From feb4ebb63656e62530647b69ca025bf398aec455 Mon Sep 17 00:00:00 2001 From: phanisankar-nidadavolu Date: Mon, 13 May 2019 19:20:54 -0400 Subject: [PATCH] updated local/musan.sh to steps/data/make_musan.sh in speaker id scripts --- egs/bn_music_speech/v1/local/make_musan.py | 119 ----------------- egs/bn_music_speech/v1/local/make_musan.sh | 37 ------ egs/bn_music_speech/v1/run.sh | 2 +- .../v1/local/make_musan.py | 119 ----------------- .../v1/local/make_musan.sh | 37 ------ egs/callhome_diarization/v2/run.sh | 2 +- egs/dihard_2018/v2/local/make_musan.py | 123 ------------------ egs/dihard_2018/v2/local/make_musan.sh | 39 ------ egs/dihard_2018/v2/run.sh | 14 +- egs/sitw/v1/local/make_musan.py | 123 ------------------ egs/sitw/v1/local/make_musan.sh | 39 ------ egs/sitw/v1/run.sh | 2 +- egs/sitw/v2/run.sh | 2 +- egs/sre16/v1/local/make_musan.py | 119 ----------------- egs/sre16/v1/local/make_musan.sh | 37 ------ egs/sre16/v1/run.sh | 2 +- egs/sre16/v2/run.sh | 3 +- egs/voxceleb/v1/local/make_musan.py | 123 ------------------ egs/voxceleb/v1/local/make_musan.sh | 39 ------ egs/voxceleb/v2/run.sh | 4 +- egs/wsj/s5/steps/data/make_musan.py | 26 ++-- 21 files changed, 29 insertions(+), 982 deletions(-) delete mode 100755 egs/bn_music_speech/v1/local/make_musan.py delete mode 100755 egs/bn_music_speech/v1/local/make_musan.sh delete mode 100755 egs/callhome_diarization/v1/local/make_musan.py delete mode 100755 egs/callhome_diarization/v1/local/make_musan.sh delete mode 100755 egs/dihard_2018/v2/local/make_musan.py delete mode 100755 egs/dihard_2018/v2/local/make_musan.sh delete mode 100755 egs/sitw/v1/local/make_musan.py delete mode 100755 egs/sitw/v1/local/make_musan.sh delete mode 100755 egs/sre16/v1/local/make_musan.py delete mode 100755 egs/sre16/v1/local/make_musan.sh delete mode 100755 egs/voxceleb/v1/local/make_musan.py delete mode 100755 egs/voxceleb/v1/local/make_musan.sh diff --git a/egs/bn_music_speech/v1/local/make_musan.py b/egs/bn_music_speech/v1/local/make_musan.py deleted file mode 100755 index eb739b68180..00000000000 --- a/egs/bn_music_speech/v1/local/make_musan.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/bn_music_speech/v1/local/make_musan.sh b/egs/bn_music_speech/v1/local/make_musan.sh deleted file mode 100755 index 694940ad70f..00000000000 --- a/egs/bn_music_speech/v1/local/make_musan.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/bn_music_speech/v1/run.sh b/egs/bn_music_speech/v1/run.sh index 6cc0531e9d7..08d5c022a9d 100755 --- a/egs/bn_music_speech/v1/run.sh +++ b/egs/bn_music_speech/v1/run.sh @@ -20,7 +20,7 @@ vaddir=`pwd`/mfcc local/make_bn.sh /export/corpora5/LDC/LDC97S44 \ /export/corpora/LDC/LDC97T22 data -local/make_musan.sh /export/corpora/JHU/musan data +steps/data/make_musan.sh --sampling-rate 16000 /export/corpora/JHU/musan data steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 30 --cmd "$train_cmd" \ data/musan_speech exp/make_mfcc $mfccdir diff --git a/egs/callhome_diarization/v1/local/make_musan.py b/egs/callhome_diarization/v1/local/make_musan.py deleted file mode 100755 index 7c50adf7c83..00000000000 --- a/egs/callhome_diarization/v1/local/make_musan.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file: {}".format(utt)) - num_bad_files += 1 - print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file: {}".format(utt)) - num_bad_files += 1 - print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file: {}".format(utt)) - num_bad_files += 1 - print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/callhome_diarization/v1/local/make_musan.sh b/egs/callhome_diarization/v1/local/make_musan.sh deleted file mode 100755 index 694940ad70f..00000000000 --- a/egs/callhome_diarization/v1/local/make_musan.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh index b79717e2348..e85baa50691 100755 --- a/egs/callhome_diarization/v2/run.sh +++ b/egs/callhome_diarization/v2/run.sh @@ -130,7 +130,7 @@ if [ $stage -le 2 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh /export/corpora/JHU/musan data + steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. diff --git a/egs/dihard_2018/v2/local/make_musan.py b/egs/dihard_2018/v2/local/make_musan.py deleted file mode 100755 index c4b5c9359b4..00000000000 --- a/egs/dihard_2018/v2/local/make_musan.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# 2018 Ewald Enzinger -# Apache 2.0. -# -# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/dihard_2018/v2/local/make_musan.sh b/egs/dihard_2018/v2/local/make_musan.sh deleted file mode 100755 index 1565ef0d85c..00000000000 --- a/egs/dihard_2018/v2/local/make_musan.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh index d330322a5e8..b0fbdcc968c 100755 --- a/egs/dihard_2018/v2/run.sh +++ b/egs/dihard_2018/v2/run.sh @@ -118,7 +118,7 @@ if [ $stage -le 2 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh $musan_root data + steps/data/make_musan.sh --sampling-rate 16000 $musan_root data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. @@ -251,7 +251,7 @@ if [ $stage -le 12 ]; then # The threshold is in terms of the log likelihood ratio provided by the # PLDA scores. In a perfectly calibrated system, the threshold is 0. - # In the following loop, we evaluate DER performance on DIHARD 2018 development + # In the following loop, we evaluate DER performance on DIHARD 2018 development # set using some reasonable thresholds for a well-calibrated system. for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \ @@ -276,16 +276,16 @@ if [ $stage -le 12 ]; then --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \ $nnet_dir/xvectors_dihard_2018_dev/plda_scores $nnet_dir/xvectors_dihard_2018_dev/plda_scores - # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD - # 2018 development set. The DIHARD 2018 development set is used as the validation - # set to tune the parameters. + # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD + # 2018 development set. The DIHARD 2018 development set is used as the validation + # set to tune the parameters. diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \ --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \ $nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores mkdir -p $nnet_dir/results - # Compute the DER on the DIHARD 2018 evaluation set. We use the official metrics of - # the DIHARD challenge. The DER is calculated with no unscored collars and including + # Compute the DER on the DIHARD 2018 evaluation set. We use the official metrics of + # the DIHARD challenge. The DER is calculated with no unscored collars and including # overlapping speech. md-eval.pl -r data/dihard_2018_eval/rttm \ -s $nnet_dir/xvectors_dihard_2018_eval/plda_scores/rttm 2> $nnet_dir/results/threshold.log \ diff --git a/egs/sitw/v1/local/make_musan.py b/egs/sitw/v1/local/make_musan.py deleted file mode 100755 index 833da0619c9..00000000000 --- a/egs/sitw/v1/local/make_musan.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# 2018 Ewald Enzinger -# Apache 2.0. -# -# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/sitw/v1/local/make_musan.sh b/egs/sitw/v1/local/make_musan.sh deleted file mode 100755 index 1565ef0d85c..00000000000 --- a/egs/sitw/v1/local/make_musan.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh index e016f8a4752..797451df263 100755 --- a/egs/sitw/v1/run.sh +++ b/egs/sitw/v1/run.sh @@ -137,7 +137,7 @@ if [ $stage -le 4 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh $musan_root data + steps/data/make_musan.sh --sampling-rate 16000 $musan_root data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh index 8aeecc18b3f..aad58e4a853 100755 --- a/egs/sitw/v2/run.sh +++ b/egs/sitw/v2/run.sh @@ -103,7 +103,7 @@ if [ $stage -le 2 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh $musan_root data + steps/data/make_musan.sh --sampling-rate 16000 $musan_root data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. diff --git a/egs/sre16/v1/local/make_musan.py b/egs/sre16/v1/local/make_musan.py deleted file mode 100755 index 7735bd28818..00000000000 --- a/egs/sre16/v1/local/make_musan.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/sre16/v1/local/make_musan.sh b/egs/sre16/v1/local/make_musan.sh deleted file mode 100755 index 694940ad70f..00000000000 --- a/egs/sre16/v1/local/make_musan.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/sre16/v1/run.sh b/egs/sre16/v1/run.sh index 28481e27c3a..2315d7ac78a 100755 --- a/egs/sre16/v1/run.sh +++ b/egs/sre16/v1/run.sh @@ -145,7 +145,7 @@ if [ $stage -le 4 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh /export/corpora/JHU/musan data + steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. diff --git a/egs/sre16/v2/run.sh b/egs/sre16/v2/run.sh index b2072dfd69d..7780c30560b 100755 --- a/egs/sre16/v2/run.sh +++ b/egs/sre16/v2/run.sh @@ -135,7 +135,7 @@ if [ $stage -le 2 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh /export/corpora/JHU/musan data + steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. @@ -174,6 +174,7 @@ if [ $stage -le 2 ]; then utils/copy_data_dir.sh data/swbd_sre_combined data/sre_combined utils/filter_scp.pl data/sre/spk2utt data/swbd_sre_combined/spk2utt | utils/spk2utt_to_utt2spk.pl > data/sre_combined/utt2spk utils/fix_data_dir.sh data/sre_combined + fi # Now we prepare the features to generate examples for xvector training. diff --git a/egs/voxceleb/v1/local/make_musan.py b/egs/voxceleb/v1/local/make_musan.py deleted file mode 100755 index 565bfce0cc9..00000000000 --- a/egs/voxceleb/v1/local/make_musan.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2015 David Snyder -# 2018 Ewald Enzinger -# Apache 2.0. -# -# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz. -# -# This file is meant to be invoked by make_musan.sh. - -import os, sys - -def process_music_annotations(path): - utt2spk = {} - utt2vocals = {} - lines = open(path, 'r').readlines() - for line in lines: - utt, genres, vocals, musician = line.rstrip().split()[:4] - # For this application, the musican ID isn't important - utt2spk[utt] = utt - utt2vocals[utt] = vocals == "Y" - return utt2spk, utt2vocals - -def prepare_music(root_dir, use_vocals): - utt2vocals = {} - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - music_dir = os.path.join(root_dir, "music") - for root, dirs, files in os.walk(music_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - elif str(file) == "ANNOTATIONS": - utt2spk_part, utt2vocals_part = process_music_annotations(file_path) - utt2spk.update(utt2spk_part) - utt2vocals.update(utt2vocals_part) - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2vocals: - if utt in utt2wav: - if use_vocals or not utt2vocals[utt]: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_speech(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - speech_dir = os.path.join(root_dir, "speech") - for root, dirs, files in os.walk(speech_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def prepare_noise(root_dir): - utt2spk = {} - utt2wav = {} - num_good_files = 0 - num_bad_files = 0 - noise_dir = os.path.join(root_dir, "noise") - for root, dirs, files in os.walk(noise_dir): - for file in files: - file_path = os.path.join(root, file) - if file.endswith(".wav"): - utt = str(file).replace(".wav", "") - utt2wav[utt] = file_path - utt2spk[utt] = utt - utt2spk_str = "" - utt2wav_str = "" - for utt in utt2spk: - if utt in utt2wav: - utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" - num_good_files += 1 - else: - print("Missing file {}".format(utt)) - num_bad_files += 1 - print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files)) - return utt2spk_str, utt2wav_str - -def main(): - in_dir = sys.argv[1] - out_dir = sys.argv[2] - use_vocals = sys.argv[3] == "Y" - utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) - utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) - utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) - utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise - utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise - wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') - wav_fi.write(utt2wav) - utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') - utt2spk_fi.write(utt2spk) - - -if __name__=="__main__": - main() diff --git a/egs/voxceleb/v1/local/make_musan.sh b/egs/voxceleb/v1/local/make_musan.sh deleted file mode 100755 index 1565ef0d85c..00000000000 --- a/egs/voxceleb/v1/local/make_musan.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Copyright 2015 David Snyder -# Apache 2.0. -# -# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). -# -# This script, called by ../run.sh, creates the MUSAN -# data directory. The required dataset is freely available at -# http://www.openslr.org/17/ - -set -e -in_dir=$1 -data_dir=$2 -use_vocals='Y' - -mkdir -p local/musan.tmp - -echo "Preparing ${data_dir}/musan..." -mkdir -p ${data_dir}/musan -local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} - -utils/fix_data_dir.sh ${data_dir}/musan - -grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music -grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech -grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ - ${data_dir}/musan ${data_dir}/musan_music -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ - ${data_dir}/musan ${data_dir}/musan_speech -utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ - ${data_dir}/musan ${data_dir}/musan_noise - -utils/fix_data_dir.sh ${data_dir}/musan_music -utils/fix_data_dir.sh ${data_dir}/musan_speech -utils/fix_data_dir.sh ${data_dir}/musan_noise - -rm -rf local/musan.tmp - diff --git a/egs/voxceleb/v2/run.sh b/egs/voxceleb/v2/run.sh index 44340873a80..7c70e4a42c1 100755 --- a/egs/voxceleb/v2/run.sh +++ b/egs/voxceleb/v2/run.sh @@ -30,7 +30,7 @@ if [ $stage -le 0 ]; then # This script creates data/voxceleb1_test and data/voxceleb1_train for latest version of VoxCeleb1. # Our evaluation set is the test portion of VoxCeleb1. local/make_voxceleb1_v2.pl $voxceleb1_root dev data/voxceleb1_train - local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test + local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test # if you downloaded the dataset soon after it was released, you will want to use the make_voxceleb1.pl script instead. # local/make_voxceleb1.pl $voxceleb1_root data # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1. @@ -84,7 +84,7 @@ if [ $stage -le 2 ]; then # Prepare the MUSAN corpus, which consists of music, speech, and noise # suitable for augmentation. - local/make_musan.sh $musan_root data + steps/data/make_musan.sh --sampling-rate 16000 $musan_root data # Get the duration of the MUSAN recordings. This will be used by the # script augment_data_dir.py. diff --git a/egs/wsj/s5/steps/data/make_musan.py b/egs/wsj/s5/steps/data/make_musan.py index 2a7bed453cb..80b9d7cf6d4 100755 --- a/egs/wsj/s5/steps/data/make_musan.py +++ b/egs/wsj/s5/steps/data/make_musan.py @@ -18,10 +18,10 @@ def get_args(): action=common_lib.StrToBoolAction, choices=["true", "false"], help='use vocals from the music corpus') - parser.add_argument('--sampling-rate', type=int, default=None, + parser.add_argument('--sampling-rate', type=int, default=16000, help="Sampling rate of the source data. If a positive integer is specified with this option, " "the MUSAN corpus will be resampled to the rate of the source data." - "Original MUSAN corpus is sampled at 16KHz") + "Original MUSAN corpus is sampled at 16KHz. Defaults to 16000 Hz") parser.add_argument("in_dir", help="Input data directory") parser.add_argument("out_dir", help="Output data directory") @@ -75,11 +75,11 @@ def prepare_music(root_dir, use_vocals, sampling_rate): if utt in utt2wav: if use_vocals or not utt2vocals[utt]: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - if sampling_rate is not None or sampling_rate != 16000: + if sampling_rate == 16000: + utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" + else: utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ " {fs} -t wav - |\n".format(fs=sampling_rate) - else: - utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" num_good_files += 1 else: print("Missing file {}".format(utt)) @@ -108,11 +108,11 @@ def prepare_speech(root_dir, sampling_rate): for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - if sampling_rate is not None or sampling_rate != 16000: - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ - " {fs} -t wav - |\n".format(fs=sampling_rate) - else: + if sampling_rate == 16000: utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" + else: + utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ + " {fs} -t wav - |\n".format(fs=sampling_rate) num_good_files += 1 else: print("Missing file {}".format(utt)) @@ -141,11 +141,11 @@ def prepare_noise(root_dir, sampling_rate): for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" - if sampling_rate is not None or sampling_rate != 16000: - utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r " \ - "{fs} -t wav - |\n".format(fs=sampling_rate) - else: + if sampling_rate == 16000: utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" + else: + utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \ + " {fs} -t wav - |\n".format(fs=sampling_rate) num_good_files += 1 else: print("Missing file {}".format(utt))