Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions egs/sitw/v1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,18 @@ if [ $stage -le 0 ]; then
# set SITW. The script removes the overlapping speakers from VoxCeleb1.
local/make_voxceleb1.pl $voxceleb1_root data

# Prepare the VoxCeleb2 dataset.
# Prepare the dev portion of the VoxCeleb2 dataset.
local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test

# The original version of this recipe included the test portion of VoxCeleb2
# in the training list. Unfortunately, it turns out that there's an overlap
# with our evaluation set, Speakers in the Wild. Therefore, we've removed
# this dataset from the training list.
# local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test

# We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
# This should give 7,351 speakers and 1,277,503 utterances.
utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1
# This should leave 7,185 speakers and 1,236,567 utterances.
utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1

# Prepare Speakers in the Wild. This is our evaluation dataset.
local/make_sitw.sh $sitw_root data
Expand Down Expand Up @@ -213,9 +218,9 @@ if [ $stage -le 8 ]; then
"cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_dev_core_scores || exit 1;

# SITW Dev Core:
# EER: 5.044%
# minDCF(p-target=0.01): 0.4154
# minDCF(p-target=0.001): 0.5583
# EER: 4.813%
# minDCF(p-target=0.01): 0.4250
# minDCF(p-target=0.001): 0.5727
echo "SITW Dev Core:"
eer=$(paste $sitw_dev_trials_core exp/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null`
Expand All @@ -236,9 +241,9 @@ if [ $stage -le 9 ]; then
"cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" exp/scores/sitw_eval_core_scores || exit 1;

# SITW Eval Core:
# EER: 5.303%
# minDCF(p-target=0.01): 0.4526
# minDCF(p-target=0.001): 0.6347
# EER: 5.659%
# minDCF(p-target=0.01): 0.4637
# minDCF(p-target=0.001): 0.6290
echo -e "\nSITW Eval Core:";
eer=$(paste $sitw_eval_trials_core exp/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
mindcf1=`sid/compute_min_dcf.py --p-target 0.01 exp/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`
Expand Down
30 changes: 18 additions & 12 deletions egs/sitw/v2/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,19 @@ if [ $stage -le 0 ]; then
# set SITW. The script removes these overlapping speakers from VoxCeleb1.
local/make_voxceleb1.pl $voxceleb1_root data

# Prepare the VoxCeleb2 dataset.
# Prepare the dev portion of the VoxCeleb2 dataset.
local/make_voxceleb2.pl $voxceleb2_root dev data/voxceleb2_train
local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test

# We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
# This should give 7,351 speakers and 1,277,503 utterances.
utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb2_test data/voxceleb1
# The original version of this recipe included the test portion of VoxCeleb2
# in the training list. Unfortunately, it turns out that there's an overlap
# with our evaluation set, Speakers in the Wild. Therefore, we've removed
# this dataset from the training list.
# local/make_voxceleb2.pl $voxceleb2_root test data/voxceleb2_test

# We'll train on the dev portion of VoxCeleb2, plus VoxCeleb1 (minus the
# speakers that overlap with SITW).
# This should leave 7,185 speakers and 1,236,567 utterances.
utils/combine_data.sh data/train data/voxceleb2_train data/voxceleb1

# Prepare Speakers in the Wild. This is our evaluation dataset.
local/make_sitw.sh $sitw_root data
Expand Down Expand Up @@ -169,7 +175,7 @@ if [ $stage -le 5 ]; then
fi

# Stages 6 through 8 are handled in run_xvector.sh
local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage 30 \
local/nnet3/xvector/run_xvector.sh --stage $stage --train-stage -1 \
--data data/train_combined_no_sil --nnet-dir $nnet_dir \
--egs-dir $nnet_dir/egs

Expand Down Expand Up @@ -228,9 +234,9 @@ if [ $stage -le 11 ]; then
"cat '$sitw_dev_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_dev_core_scores || exit 1;

# SITW Dev Core:
# EER: 3.08%
# minDCF(p-target=0.01): 0.3016
# minDCF(p-target=0.001): 0.4993
# EER: 3.003%
# minDCF(p-target=0.01): 0.3119
# minDCF(p-target=0.001): 0.4955
echo "SITW Dev Core:"
eer=$(paste $sitw_dev_trials_core $nnet_dir/scores/sitw_dev_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_dev_core_scores $sitw_dev_trials_core 2> /dev/null`
Expand All @@ -251,9 +257,9 @@ if [ $stage -le 12 ]; then
"cat '$sitw_eval_trials_core' | cut -d\ --fields=1,2 |" $nnet_dir/scores/sitw_eval_core_scores || exit 1;

# SITW Eval Core:
# EER: 3.335%
# minDCF(p-target=0.01): 0.3412
# minDCF(p-target=0.001): 0.5106
# EER: 3.499%
# minDCF(p-target=0.01): 0.3424
# minDCF(p-target=0.001): 0.5164
echo -e "\nSITW Eval Core:";
eer=$(paste $sitw_eval_trials_core $nnet_dir/scores/sitw_eval_core_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores/sitw_eval_core_scores $sitw_eval_trials_core 2> /dev/null`
Expand Down