diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh index 79ad18c2023..68d08dfc170 100755 --- a/egs/sitw/v1/run.sh +++ b/egs/sitw/v1/run.sh @@ -39,13 +39,18 @@ if [ $stage -le 0 ]; then # set SITW. The script removes the overlapping speakers from VoxCeleb1. local/make_voxceleb1.pl $voxceleb1_root data - # Prepare the VoxCeleb2 dataset. + # Prepare the dev portion of the VoxCeleb2 dataset. local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train - local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test + + # The original version of this recipe included the test portion of VoxCeleb2 + # in the training list. Unfortunately, it turns out that there's an overlap + # with our evaluation set, Speakers in the Wild. Therefore, we've removed + # this dataset from the training list. + # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1. - # This should give 7,351 speakers and 1,277,503 utterances. - utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1 + # This should leave 7,185 speakers and 1,236,567 utterances. + utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1 # Prepare Speakers in the Wild. This is our evaluation dataset. local/make_sitw.sh $sitw_root data @@ -213,9 +218,9 @@ if [ $stage -le 8 ]; then "cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_dev_core_scores || exit 1; # SITW Dev Core: - # EER: 5.044% - # minDCF(p-target=0.01): 0.4154 - # minDCF(p-target=0.001): 0.5583 + # EER: 4.813% + # minDCF(p-target=0.01): 0.4250 + # minDCF(p-target=0.001): 0.5727 echo "SITW Dev Core:" eer=$(paste $sitw_dev_trials_core exp/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null` @@ -236,9 +241,9 @@ if [ $stage -le 9 ]; then "cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_eval_core_scores || exit 1; # SITW Eval Core: - # EER: 5.303% - # minDCF(p-target=0.01): 0.4526 - # minDCF(p-target=0.001): 0.6347 + # EER: 5.659% + # minDCF(p-target=0.01): 0.4637 + # minDCF(p-target=0.001): 0.6290 echo -e "\nSITW Eval Core:"; eer=$(paste $sitw_eval_trials_core exp/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null` diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh index c5529242e94..499d436366a 100755 --- a/egs/sitw/v2/run.sh +++ b/egs/sitw/v2/run.sh @@ -39,13 +39,19 @@ if [ $stage -le 0 ]; then # set SITW. The script removes these overlapping speakers from VoxCeleb1. local/make_voxceleb1.pl $voxceleb1_root data - # Prepare the VoxCeleb2 dataset. + # Prepare the dev portion of the VoxCeleb2 dataset. local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train - local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test - # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1. - # This should give 7,351 speakers and 1,277,503 utterances. - utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1 + # The original version of this recipe included the test portion of VoxCeleb2 + # in the training list. Unfortunately, it turns out that there's an overlap + # with our evaluation set, Speakers in the Wild. Therefore, we've removed + # this dataset from the training list. + # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test + + # We'll train on the dev portion of VoxCeleb2, plus VoxCeleb1 (minus the + # speakers that overlap with SITW). + # This should leave 7,185 speakers and 1,236,567 utterances. + utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1 # Prepare Speakers in the Wild. This is our evaluation dataset. local/make_sitw.sh $sitw_root data @@ -169,7 +175,7 @@ if [ $stage -le 5 ]; then fi # Stages 6 through 8 are handled in run_xvector.sh -local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage 30 \ +local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \ --data data/train_combined_no_sil --nnet-dir $nnet_dir \ --egs-dir $nnet_dir/egs @@ -228,9 +234,9 @@ if [ $stage -le 11 ]; then "cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_dev_core_scores || exit 1; # SITW Dev Core: - # EER: 3.08% - # minDCF(p-target=0.01): 0.3016 - # minDCF(p-target=0.001): 0.4993 + # EER: 3.003% + # minDCF(p-target=0.01): 0.3119 + # minDCF(p-target=0.001): 0.4955 echo "SITW Dev Core:" eer=$(paste $sitw_dev_trials_core $nnet_dir/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null` @@ -251,9 +257,9 @@ if [ $stage -le 12 ]; then "cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_eval_core_scores || exit 1; # SITW Eval Core: - # EER: 3.335% - # minDCF(p-target=0.01): 0.3412 - # minDCF(p-target=0.001): 0.5106 + # EER: 3.499% + # minDCF(p-target=0.01): 0.3424 + # minDCF(p-target=0.001): 0.5164 echo -e "\nSITW Eval Core:"; eer=$(paste $sitw_eval_trials_core $nnet_dir/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`