diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh
index 79ad18c2023..68d08dfc170 100755
--- a/egs/sitw/v1/run.sh
+++ b/egs/sitw/v1/run.sh
@@ -39,13 +39,18 @@ if [ $stage -le 0 ]; then
   # set SITW.  The script removes the overlapping speakers from VoxCeleb1.
   local/make_voxceleb1.pl $voxceleb1_root data
 
-  # Prepare the VoxCeleb2 dataset.
+  # Prepare the dev portion of the VoxCeleb2 dataset.
   local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
-  local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
+
+  # The original version of this recipe included the test portion of VoxCeleb2
+  # in the training list.  Unfortunately, it turns out that there's an overlap
+  # with our evaluation set, Speakers in the Wild.  Therefore, we've removed
+  # this dataset from the training list.
+  # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
 
   # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
-  # This should give 7,351 speakers and 1,277,503 utterances.
-  utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1
+  # This should leave 7,185 speakers and 1,236,567 utterances.
+  utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1
 
   # Prepare Speakers in the Wild.  This is our evaluation dataset.
   local/make_sitw.sh $sitw_root data
@@ -213,9 +218,9 @@ if [ $stage -le 8 ]; then
     "cat '$sitw_dev_trials_core' | cut -d\  --fields=1,2 |" exp/scores/sitw_dev_core_scores || exit 1;
 
   # SITW Dev Core:
-  # EER: 5.044%
-  # minDCF(p-target=0.01): 0.4154
-  # minDCF(p-target=0.001): 0.5583
+  # EER: 4.813%
+  # minDCF(p-target=0.01): 0.4250
+  # minDCF(p-target=0.001): 0.5727
   echo "SITW Dev Core:"
   eer=$(paste $sitw_dev_trials_core exp/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null`
@@ -236,9 +241,9 @@ if [ $stage -le 9 ]; then
     "cat '$sitw_eval_trials_core' | cut -d\  --fields=1,2 |" exp/scores/sitw_eval_core_scores || exit 1;
 
   # SITW Eval Core:
-  # EER: 5.303%
-  # minDCF(p-target=0.01): 0.4526
-  # minDCF(p-target=0.001): 0.6347
+  # EER: 5.659%
+  # minDCF(p-target=0.01): 0.4637
+  # minDCF(p-target=0.001): 0.6290
   echo -e "\nSITW Eval Core:";
   eer=$(paste $sitw_eval_trials_core exp/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`
diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh
index c5529242e94..499d436366a 100755
--- a/egs/sitw/v2/run.sh
+++ b/egs/sitw/v2/run.sh
@@ -39,13 +39,19 @@ if [ $stage -le 0 ]; then
   # set SITW.  The script removes these overlapping speakers from VoxCeleb1.
   local/make_voxceleb1.pl $voxceleb1_root data
 
-  # Prepare the VoxCeleb2 dataset.
+  # Prepare the dev portion of the VoxCeleb2 dataset.
   local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
-  local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
 
-  # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
-  # This should give 7,351 speakers and 1,277,503 utterances.
-  utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1
+  # The original version of this recipe included the test portion of VoxCeleb2
+  # in the training list.  Unfortunately, it turns out that there's an overlap
+  # with our evaluation set, Speakers in the Wild.  Therefore, we've removed
+  # this dataset from the training list.
+  # local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test
+
+  # We'll train on the dev portion of VoxCeleb2, plus VoxCeleb1 (minus the
+  # speakers that overlap with SITW).
+  # This should leave 7,185 speakers and 1,236,567 utterances.
+  utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1
 
   # Prepare Speakers in the Wild.  This is our evaluation dataset.
   local/make_sitw.sh $sitw_root data
@@ -169,7 +175,7 @@ if [ $stage -le 5 ]; then
 fi
 
 # Stages 6 through 8 are handled in run_xvector.sh
-local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage 30 \
+local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \
   --data data/train_combined_no_sil --nnet-dir $nnet_dir \
   --egs-dir $nnet_dir/egs
 
@@ -228,9 +234,9 @@ if [ $stage -le 11 ]; then
     "cat '$sitw_dev_trials_core' | cut -d\  --fields=1,2 |" $nnet_dir/scores/sitw_dev_core_scores || exit 1;
 
   # SITW Dev Core:
-  # EER: 3.08%
-  # minDCF(p-target=0.01): 0.3016
-  # minDCF(p-target=0.001): 0.4993
+  # EER: 3.003%
+  # minDCF(p-target=0.01): 0.3119
+  # minDCF(p-target=0.001): 0.4955
   echo "SITW Dev Core:"
   eer=$(paste $sitw_dev_trials_core $nnet_dir/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null`
@@ -251,9 +257,9 @@ if [ $stage -le 12 ]; then
     "cat '$sitw_eval_trials_core' | cut -d\  --fields=1,2 |" $nnet_dir/scores/sitw_eval_core_scores || exit 1;
 
   # SITW Eval Core:
-  # EER: 3.335%
-  # minDCF(p-target=0.01): 0.3412
-  # minDCF(p-target=0.001): 0.5106
+  # EER: 3.499%
+  # minDCF(p-target=0.01): 0.3424
+  # minDCF(p-target=0.001): 0.5164
   echo -e "\nSITW Eval Core:";
   eer=$(paste $sitw_eval_trials_core $nnet_dir/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`