From b73f1b6f971987df43a3e917246f407c6fc4d682 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 8 Jan 2019 16:56:33 -0500 Subject: [PATCH 1/3] [egs] minor fixes related to python2 vs python3 differences --- egs/callhome_diarization/v1/local/make_musan.py | 6 +++--- egs/callhome_diarization/v2/run.sh | 8 ++++---- egs/sitw/v1/run.sh | 8 ++++---- egs/sitw/v2/run.sh | 8 ++++---- egs/sre16/v1/run.sh | 8 ++++---- egs/sre16/v2/run.sh | 8 ++++---- egs/voxceleb/v2/run.sh | 8 ++++---- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/egs/callhome_diarization/v1/local/make_musan.py b/egs/callhome_diarization/v1/local/make_musan.py index 974e73e0777..7c50adf7c83 100755 --- a/egs/callhome_diarization/v1/local/make_musan.py +++ b/egs/callhome_diarization/v1/local/make_musan.py @@ -45,7 +45,7 @@ def prepare_music(root_dir, use_vocals): else: print("Missing file: {}".format(utt)) num_bad_files += 1 - print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files) + print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_speech(root_dir): @@ -71,7 +71,7 @@ def prepare_speech(root_dir): else: print("Missing file: {}".format(utt)) num_bad_files += 1 - print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files) + print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_noise(root_dir): @@ -97,7 +97,7 @@ def prepare_noise(root_dir): else: print("Missing file: {}".format(utt)) num_bad_files += 1 - print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files) + print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def main(): diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh index 4f730d4753c..ae05dd9da1c 100755 --- a/egs/callhome_diarization/v2/run.sh +++ b/egs/callhome_diarization/v2/run.sh @@ -115,7 +115,7 @@ if [ $stage -le 2 ]; then # Make a reverberated version of the SWBD+SRE list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -140,11 +140,11 @@ if [ $stage -le 2 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/train_aug data/train_reverb data/train_noise data/train_music data/train_babble diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh index 68d08dfc170..e016f8a4752 100755 --- a/egs/sitw/v1/run.sh +++ b/egs/sitw/v1/run.sh @@ -122,7 +122,7 @@ if [ $stage -le 4 ]; then # Make a reverberated version of the VoxCeleb2 list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -147,11 +147,11 @@ if [ $stage -le 4 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_100k data/train_100k_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train_100k data/train_100k_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_100k data/train_100k_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train_100k data/train_100k_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_100k data/train_100k_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train_100k data/train_100k_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/train_aug data/train_100k_reverb data/train_100k_noise data/train_100k_music data/train_100k_babble diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh index 499d436366a..8aeecc18b3f 100755 --- a/egs/sitw/v2/run.sh +++ b/egs/sitw/v2/run.sh @@ -88,7 +88,7 @@ if [ $stage -le 2 ]; then # Make a reverberated version of the VoxCeleb2 list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -113,11 +113,11 @@ if [ $stage -le 2 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/train_aug data/train_reverb data/train_noise data/train_music data/train_babble diff --git a/egs/sre16/v1/run.sh b/egs/sre16/v1/run.sh index 52ee86ec5b2..28481e27c3a 100755 --- a/egs/sre16/v1/run.sh +++ b/egs/sre16/v1/run.sh @@ -130,7 +130,7 @@ if [ $stage -le 4 ]; then # Make a reverberated version of the SRE list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -155,11 +155,11 @@ if [ $stage -le 4 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/sre data/sre_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/sre data/sre_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/sre data/sre_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/sre data/sre_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/sre data/sre_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/sre data/sre_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/sre_aug data/sre_reverb data/sre_noise data/sre_music data/sre_babble diff --git a/egs/sre16/v2/run.sh b/egs/sre16/v2/run.sh index f1d9eb72ddc..b2072dfd69d 100755 --- a/egs/sre16/v2/run.sh +++ b/egs/sre16/v2/run.sh @@ -120,7 +120,7 @@ if [ $stage -le 2 ]; then # Make a reverberated version of the SWBD+SRE list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -145,11 +145,11 @@ if [ $stage -le 2 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/swbd_sre data/swbd_sre_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/swbd_sre data/swbd_sre_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/swbd_sre data/swbd_sre_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/swbd_sre data/swbd_sre_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/swbd_sre data/swbd_sre_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/swbd_sre data/swbd_sre_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/swbd_sre_aug data/swbd_sre_reverb data/swbd_sre_noise data/swbd_sre_music data/swbd_sre_babble diff --git a/egs/voxceleb/v2/run.sh b/egs/voxceleb/v2/run.sh index f8c50d7f9df..37bb60fe35c 100755 --- a/egs/voxceleb/v2/run.sh +++ b/egs/voxceleb/v2/run.sh @@ -66,7 +66,7 @@ if [ $stage -le 2 ]; then # Make a reverberated version of the VoxCeleb2 list. Note that we don't add any # additive noise here. - python steps/data/reverberate_data_dir.py \ + steps/data/reverberate_data_dir.py \ "${rvb_opts[@]}" \ --speech-rvb-probability 1 \ --pointsource-noise-addition-probability 0 \ @@ -91,11 +91,11 @@ if [ $stage -le 2 ]; then done # Augment with musan_noise - python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise + steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/train data/train_noise # Augment with musan_music - python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music + steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/train data/train_music # Augment with musan_speech - python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble + steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/train data/train_babble # Combine reverb, noise, music, and babble into one directory. utils/combine_data.sh data/train_aug data/train_reverb data/train_noise data/train_music data/train_babble From 00cc52538f67804ecccf78826a3418c8ffb501b6 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Fri, 11 Jan 2019 10:45:59 -0500 Subject: [PATCH 2/3] [egs] fixing more syntax errors introduced in PR #2925 --- egs/sitw/v1/local/make_musan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/sitw/v1/local/make_musan.py b/egs/sitw/v1/local/make_musan.py index c4b5c9359b4..833da0619c9 100755 --- a/egs/sitw/v1/local/make_musan.py +++ b/egs/sitw/v1/local/make_musan.py @@ -49,7 +49,7 @@ def prepare_music(root_dir, use_vocals): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_speech(root_dir): @@ -75,7 +75,7 @@ def prepare_speech(root_dir): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_noise(root_dir): @@ -101,7 +101,7 @@ def prepare_noise(root_dir): else: print("Missing file {}".format(utt)) num_bad_files += 1 - print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) + print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def main(): From 3536b5a92da17ba4ee85bdedeb82494c9d6ddac9 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 15 Jan 2019 17:38:22 -0500 Subject: [PATCH 3/3] [src] modifying ivector-plda-scoring-dense so that if target-energy=1.0, it doesn't compute the conversation dependent PCA --- src/ivectorbin/ivector-plda-scoring-dense.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ivectorbin/ivector-plda-scoring-dense.cc b/src/ivectorbin/ivector-plda-scoring-dense.cc index 73ca879e6bc..e96f7de99d4 100644 --- a/src/ivectorbin/ivector-plda-scoring-dense.cc +++ b/src/ivectorbin/ivector-plda-scoring-dense.cc @@ -27,7 +27,14 @@ namespace kaldi { bool EstPca(const Matrix &ivector_mat, BaseFloat target_energy, - Matrix *mat) { + const std::string &reco, Matrix *mat) { + + // If the target_energy is 1.0, it's equivalent to not applying the + // conversation-dependent PCA at all, so it's better to exit this + // function before doing any computation. + if (ApproxEqual(target_energy, 1.0, 0.001)) + return false; + int32 num_rows = ivector_mat.NumRows(), num_cols = ivector_mat.NumCols(); Vector sum; @@ -50,6 +57,8 @@ bool EstPca(const Matrix &ivector_mat, BaseFloat target_energy, else Matrix(sumsq).Svd(&s, &P, NULL); } catch (...) { + KALDI_WARN << "Unable to compute conversation dependent PCA for" + << " recording " << reco << "."; return false; } @@ -181,7 +190,7 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < ivectors.size(); i++) { ivector_mat.Row(i).CopyFromVec(ivectors[i]); } - if (EstPca(ivector_mat, target_energy, &pca_transform)) { + if (EstPca(ivector_mat, target_energy, reco, &pca_transform)) { // Apply the PCA transform to the raw i-vectors. ApplyPca(ivector_mat, pca_transform, &ivector_mat_pca); @@ -192,8 +201,7 @@ int main(int argc, char *argv[]) { TransformIvectors(ivector_mat_pca, plda_config, this_plda, &ivector_mat_plda); } else { - KALDI_WARN << "Unable to compute conversation dependent PCA for" - << " recording " << reco << "."; + // If EstPca returns false, we won't apply any PCA. TransformIvectors(ivector_mat, plda_config, this_plda, &ivector_mat_plda); }