From 34dccef27b6fa0022226e545505b3e91a3845c24 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Sun, 1 Jul 2018 18:59:15 -0400 Subject: [PATCH 1/2] [scripts] fixing bug in steps/nnet3/train_raw_{dnn,rnn}.py --- egs/wsj/s5/steps/nnet3/train_raw_dnn.py | 2 +- egs/wsj/s5/steps/nnet3/train_raw_rnn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py index 8aa8b6cd77a..34214169d5d 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_dnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_dnn.py @@ -305,7 +305,7 @@ def train(args, run_opts): if args.stage <= -1: logger.info("Preparing the initial network.") - common_train_lib.prepare_initial_network(args.dir, run_opts, args.input_model) + common_train_lib.prepare_initial_network(args.dir, run_opts, args.srand, args.input_model) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == diff --git a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py index b078f8e4504..e797c86b323 100755 --- a/egs/wsj/s5/steps/nnet3/train_raw_rnn.py +++ b/egs/wsj/s5/steps/nnet3/train_raw_rnn.py @@ -360,7 +360,7 @@ def train(args, run_opts): if args.stage <= -1: logger.info("Preparing the initial network.") - common_train_lib.prepare_initial_network(args.dir, run_opts, args.input_model) + common_train_lib.prepare_initial_network(args.dir, run_opts, args.srand, args.input_model) # set num_iters so that as close as possible, we process the data # $num_epochs times, i.e. $num_iters*$avg_num_jobs) == From 1fc1adb807e91ed2cd512b5b19ff8af12b36f8e1 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Wed, 4 Jul 2018 18:01:39 -0400 Subject: [PATCH 2/2] [egs] updating the results of egs/sitw after removing the voxceleb2 test set from the training list because of an overlap with the evaluation dataset --- egs/sitw/v1/run.sh | 25 +++++++++++++++---------- egs/sitw/v2/run.sh | 30 ++++++++++++++++++------------ 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh index 79ad18c2023..68d08dfc170 100755 --- a/egs/sitw/v1/run.sh +++ b/egs/sitw/v1/run.sh @@ -39,13 +39,18 @@ if [ $stage -le 0 ]; then # set SITW. The script removes the overlapping speakers from VoxCeleb1. local/make_voxceleb1.pl $voxceleb1_root data - # Prepare the VoxCeleb2 dataset. + # Prepare the dev portion of the VoxCeleb2 dataset. local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train - local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test + + # The original version of this recipe included the test portion of VoxCeleb2 + # in the training list. Unfortunately, it turns out that there's an overlap + # with our evaluation set, Speakers in the Wild. Therefore, we've removed + # this dataset from the training list. + # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1. - # This should give 7,351 speakers and 1,277,503 utterances. - utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1 + # This should leave 7,185 speakers and 1,236,567 utterances. + utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1 # Prepare Speakers in the Wild. This is our evaluation dataset. local/make_sitw.sh $sitw_root data @@ -213,9 +218,9 @@ if [ $stage -le 8 ]; then "cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_dev_core_scores || exit 1; # SITW Dev Core: - # EER: 5.044% - # minDCF(p-target=0.01): 0.4154 - # minDCF(p-target=0.001): 0.5583 + # EER: 4.813% + # minDCF(p-target=0.01): 0.4250 + # minDCF(p-target=0.001): 0.5727 echo "SITW Dev Core:" eer=$(paste $sitw_dev_trials_core exp/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null` @@ -236,9 +241,9 @@ if [ $stage -le 9 ]; then "cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_eval_core_scores || exit 1; # SITW Eval Core: - # EER: 5.303% - # minDCF(p-target=0.01): 0.4526 - # minDCF(p-target=0.001): 0.6347 + # EER: 5.659% + # minDCF(p-target=0.01): 0.4637 + # minDCF(p-target=0.001): 0.6290 echo -e "\nSITW Eval Core:"; eer=$(paste $sitw_eval_trials_core exp/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null` diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh index c5529242e94..499d436366a 100755 --- a/egs/sitw/v2/run.sh +++ b/egs/sitw/v2/run.sh @@ -39,13 +39,19 @@ if [ $stage -le 0 ]; then # set SITW. The script removes these overlapping speakers from VoxCeleb1. local/make_voxceleb1.pl $voxceleb1_root data - # Prepare the VoxCeleb2 dataset. + # Prepare the dev portion of the VoxCeleb2 dataset. local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train - local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test - # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1. - # This should give 7,351 speakers and 1,277,503 utterances. - utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1 + # The original version of this recipe included the test portion of VoxCeleb2 + # in the training list. Unfortunately, it turns out that there's an overlap + # with our evaluation set, Speakers in the Wild. Therefore, we've removed + # this dataset from the training list. + # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test + + # We'll train on the dev portion of VoxCeleb2, plus VoxCeleb1 (minus the + # speakers that overlap with SITW). + # This should leave 7,185 speakers and 1,236,567 utterances. + utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1 # Prepare Speakers in the Wild. This is our evaluation dataset. local/make_sitw.sh $sitw_root data @@ -169,7 +175,7 @@ if [ $stage -le 5 ]; then fi # Stages 6 through 8 are handled in run_xvector.sh -local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage 30 \ +local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ --data data/train_combined_no_sil --nnet-dir $nnet_dir \ --egs-dir $nnet_dir/egs @@ -228,9 +234,9 @@ if [ $stage -le 11 ]; then "cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_dev_core_scores || exit 1; # SITW Dev Core: - # EER: 3.08% - # minDCF(p-target=0.01): 0.3016 - # minDCF(p-target=0.001): 0.4993 + # EER: 3.003% + # minDCF(p-target=0.01): 0.3119 + # minDCF(p-target=0.001): 0.4955 echo "SITW Dev Core:" eer=$(paste $sitw_dev_trials_core $nnet_dir/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null` @@ -251,9 +257,9 @@ if [ $stage -le 12 ]; then "cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_eval_core_scores || exit 1; # SITW Eval Core: - # EER: 3.335% - # minDCF(p-target=0.01): 0.3412 - # minDCF(p-target=0.001): 0.5106 + # EER: 3.499% + # minDCF(p-target=0.01): 0.3424 + # minDCF(p-target=0.001): 0.5164 echo -e "\nSITW Eval Core:"; eer=$(paste $sitw_eval_trials_core $nnet_dir/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`