diff --git a/egs/voxceleb/v1/local/make_voxceleb1.pl b/egs/voxceleb/v1/local/make_voxceleb1.pl index 916e11020d2..2268c20ab52 100755 --- a/egs/voxceleb/v1/local/make_voxceleb1.pl +++ b/egs/voxceleb/v1/local/make_voxceleb1.pl @@ -15,10 +15,6 @@ my $out_test_dir = "$out_dir/voxceleb1_test"; my $out_train_dir = "$out_dir/voxceleb1_train"; -if (! -e "$data_base/voxceleb1_test.txt") { - system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt"); -} - if (system("mkdir -p $out_test_dir") != 0) { die "Error making directory $out_test_dir"; } @@ -31,20 +27,35 @@ my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh); closedir $dh; +if (! -e "$data_base/voxceleb1_test.txt") { + system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt"); +} + +if (! -e "$data_base/vox1_meta.csv") { + system("wget -O $data_base/vox1_meta.csv http://www.openslr.org/resources/49/vox1_meta.csv"); +} + open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt"; +open(META_IN, "<", "$data_base/vox1_meta.csv") or die "Could not open the meta data file $data_base/vox1_meta.csv"; open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk"; open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp"; open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk"; open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp"; open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials"; +my %id2spkr = (); +while () { + chomp; + my ($vox_id, $spkr_id, $gender, $nation, $set) = split; + $id2spkr{$vox_id} = $spkr_id; +} + my $test_spkrs = (); while () { chomp; - my ($tar_or_none, $path1, $path2) = split; + my ($tar_or_non, $path1, $path2) = split; # Create entry for left-hand side of trial - my $wav = "$data_base/voxceleb1_wav/$path1"; my ($spkr_id, $filename) = split('/', $path1); my $rec_id = substr($filename, 0, 11); my $segment = substr($filename, 12, 7); @@ -52,7 +63,6 @@ $test_spkrs{$spkr_id} = (); # Create entry for right-hand side of trial - my $wav = "$data_base/voxceleb1_wav/$path2"; my ($spkr_id, $filename) = split('/', $path2); my $rec_id = substr($filename, 0, 11); my $segment = substr($filename, 12, 7); @@ -60,7 +70,7 @@ $test_spkrs{$spkr_id} = (); my $target = "nontarget"; - if ($tar_or_none eq "1") { + if ($tar_or_non eq "1") { $target = "target"; } print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; @@ -68,6 +78,12 @@ foreach (@spkr_dirs) { my $spkr_id = $_; + my $new_spkr_id = $spkr_id; + # If we're using a newer version of VoxCeleb1, we need to "deanonymize" + # the speaker labels. + if (exists $id2spkr{$spkr_id}) { + $new_spkr_id = $id2spkr{$spkr_id}; + } opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!"; my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); closedir $dh; @@ -75,14 +91,14 @@ my $filename = $_; my $rec_id = substr($filename, 0, 11); my $segment = substr($filename, 12, 7); - my $utt_id = "$spkr_id-$rec_id-$segment"; my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav"; - if (exists $test_spkrs{$spkr_id}) { + my $utt_id = "$new_spkr_id-$rec_id-$segment"; + if (exists $test_spkrs{$new_spkr_id}) { print WAV_TEST "$utt_id", " $wav", "\n"; - print SPKR_TEST "$utt_id", " $spkr_id", "\n"; + print SPKR_TEST "$utt_id", " $new_spkr_id", "\n"; } else { print WAV_TRAIN "$utt_id", " $wav", "\n"; - print SPKR_TRAIN "$utt_id", " $spkr_id", "\n"; + print SPKR_TRAIN "$utt_id", " $new_spkr_id", "\n"; } } } @@ -93,6 +109,7 @@ close(WAV_TRAIN) or die; close(TRIAL_OUT) or die; close(TRIAL_IN) or die; +close(META_IN) or die; if (system( "utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) {