From feb4ebb63656e62530647b69ca025bf398aec455 Mon Sep 17 00:00:00 2001
From: phanisankar-nidadavolu <phanisankar.nsv@gmail.com>
Date: Mon, 13 May 2019 19:20:54 -0400
Subject: [PATCH] updated local/musan.sh to steps/data/make_musan.sh in speaker
 id scripts

---
 egs/bn_music_speech/v1/local/make_musan.py    | 119 -----------------
 egs/bn_music_speech/v1/local/make_musan.sh    |  37 ------
 egs/bn_music_speech/v1/run.sh                 |   2 +-
 .../v1/local/make_musan.py                    | 119 -----------------
 .../v1/local/make_musan.sh                    |  37 ------
 egs/callhome_diarization/v2/run.sh            |   2 +-
 egs/dihard_2018/v2/local/make_musan.py        | 123 ------------------
 egs/dihard_2018/v2/local/make_musan.sh        |  39 ------
 egs/dihard_2018/v2/run.sh                     |  14 +-
 egs/sitw/v1/local/make_musan.py               | 123 ------------------
 egs/sitw/v1/local/make_musan.sh               |  39 ------
 egs/sitw/v1/run.sh                            |   2 +-
 egs/sitw/v2/run.sh                            |   2 +-
 egs/sre16/v1/local/make_musan.py              | 119 -----------------
 egs/sre16/v1/local/make_musan.sh              |  37 ------
 egs/sre16/v1/run.sh                           |   2 +-
 egs/sre16/v2/run.sh                           |   3 +-
 egs/voxceleb/v1/local/make_musan.py           | 123 ------------------
 egs/voxceleb/v1/local/make_musan.sh           |  39 ------
 egs/voxceleb/v2/run.sh                        |   4 +-
 egs/wsj/s5/steps/data/make_musan.py           |  26 ++--
 21 files changed, 29 insertions(+), 982 deletions(-)
 delete mode 100755 egs/bn_music_speech/v1/local/make_musan.py
 delete mode 100755 egs/bn_music_speech/v1/local/make_musan.sh
 delete mode 100755 egs/callhome_diarization/v1/local/make_musan.py
 delete mode 100755 egs/callhome_diarization/v1/local/make_musan.sh
 delete mode 100755 egs/dihard_2018/v2/local/make_musan.py
 delete mode 100755 egs/dihard_2018/v2/local/make_musan.sh
 delete mode 100755 egs/sitw/v1/local/make_musan.py
 delete mode 100755 egs/sitw/v1/local/make_musan.sh
 delete mode 100755 egs/sre16/v1/local/make_musan.py
 delete mode 100755 egs/sre16/v1/local/make_musan.sh
 delete mode 100755 egs/voxceleb/v1/local/make_musan.py
 delete mode 100755 egs/voxceleb/v1/local/make_musan.sh

diff --git a/egs/bn_music_speech/v1/local/make_musan.py b/egs/bn_music_speech/v1/local/make_musan.py
deleted file mode 100755
index eb739b68180..00000000000
--- a/egs/bn_music_speech/v1/local/make_musan.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/bn_music_speech/v1/local/make_musan.sh b/egs/bn_music_speech/v1/local/make_musan.sh
deleted file mode 100755
index 694940ad70f..00000000000
--- a/egs/bn_music_speech/v1/local/make_musan.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/bn_music_speech/v1/run.sh b/egs/bn_music_speech/v1/run.sh
index 6cc0531e9d7..08d5c022a9d 100755
--- a/egs/bn_music_speech/v1/run.sh
+++ b/egs/bn_music_speech/v1/run.sh
@@ -20,7 +20,7 @@ vaddir=`pwd`/mfcc
 local/make_bn.sh /export/corpora5/LDC/LDC97S44 \
                  /export/corpora/LDC/LDC97T22 data
 
-local/make_musan.sh /export/corpora/JHU/musan data
+steps/data/make_musan.sh --sampling-rate 16000 /export/corpora/JHU/musan data
 
 steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 30 --cmd "$train_cmd" \
     data/musan_speech exp/make_mfcc $mfccdir
diff --git a/egs/callhome_diarization/v1/local/make_musan.py b/egs/callhome_diarization/v1/local/make_musan.py
deleted file mode 100755
index 7c50adf7c83..00000000000
--- a/egs/callhome_diarization/v1/local/make_musan.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file: {}".format(utt))
-      num_bad_files += 1
-  print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file: {}".format(utt))
-      num_bad_files += 1
-  print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file: {}".format(utt))
-      num_bad_files += 1
-  print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/callhome_diarization/v1/local/make_musan.sh b/egs/callhome_diarization/v1/local/make_musan.sh
deleted file mode 100755
index 694940ad70f..00000000000
--- a/egs/callhome_diarization/v1/local/make_musan.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh
index b79717e2348..e85baa50691 100755
--- a/egs/callhome_diarization/v2/run.sh
+++ b/egs/callhome_diarization/v2/run.sh
@@ -130,7 +130,7 @@ if [ $stage -le 2 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh /export/corpora/JHU/musan data
+  steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
diff --git a/egs/dihard_2018/v2/local/make_musan.py b/egs/dihard_2018/v2/local/make_musan.py
deleted file mode 100755
index c4b5c9359b4..00000000000
--- a/egs/dihard_2018/v2/local/make_musan.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-#           2018   Ewald Enzinger
-# Apache 2.0.
-#
-# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print(("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print(("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print(("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/dihard_2018/v2/local/make_musan.sh b/egs/dihard_2018/v2/local/make_musan.sh
deleted file mode 100755
index 1565ef0d85c..00000000000
--- a/egs/dihard_2018/v2/local/make_musan.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
index d330322a5e8..b0fbdcc968c 100755
--- a/egs/dihard_2018/v2/run.sh
+++ b/egs/dihard_2018/v2/run.sh
@@ -118,7 +118,7 @@ if [ $stage -le 2 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh $musan_root data
+  steps/data/make_musan.sh --sampling-rate 16000 $musan_root data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
@@ -251,7 +251,7 @@ if [ $stage -le 12 ]; then
 
   # The threshold is in terms of the log likelihood ratio provided by the
   # PLDA scores.  In a perfectly calibrated system, the threshold is 0.
-  # In the following loop, we evaluate DER performance on DIHARD 2018 development 
+  # In the following loop, we evaluate DER performance on DIHARD 2018 development
   # set using some reasonable thresholds for a well-calibrated system.
   for threshold in -0.5 -0.4 -0.3 -0.2 -0.1 -0.05 0 0.05 0.1 0.2 0.3 0.4 0.5; do
     diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
@@ -276,16 +276,16 @@ if [ $stage -le 12 ]; then
     --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $nnet_dir/xvectors_dihard_2018_dev/plda_scores $nnet_dir/xvectors_dihard_2018_dev/plda_scores
 
-  # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD 
-  # 2018 development set. The DIHARD 2018 development set is used as the validation 
-  # set to tune the parameters. 
+  # Cluster DIHARD 2018 evaluation set using the best threshold found for the DIHARD
+  # 2018 development set. The DIHARD 2018 development set is used as the validation
+  # set to tune the parameters.
   diarization/cluster.sh --cmd "$train_cmd --mem 4G" --nj 20 \
     --threshold $(cat $nnet_dir/tuning/dihard_2018_dev_best) --rttm-channel 1 \
     $nnet_dir/xvectors_dihard_2018_eval/plda_scores $nnet_dir/xvectors_dihard_2018_eval/plda_scores
 
   mkdir -p $nnet_dir/results
-  # Compute the DER on the DIHARD 2018 evaluation set. We use the official metrics of   
-  # the DIHARD challenge. The DER is calculated with no unscored collars and including  
+  # Compute the DER on the DIHARD 2018 evaluation set. We use the official metrics of
+  # the DIHARD challenge. The DER is calculated with no unscored collars and including
   # overlapping speech.
   md-eval.pl -r data/dihard_2018_eval/rttm \
     -s $nnet_dir/xvectors_dihard_2018_eval/plda_scores/rttm 2> $nnet_dir/results/threshold.log \
diff --git a/egs/sitw/v1/local/make_musan.py b/egs/sitw/v1/local/make_musan.py
deleted file mode 100755
index 833da0619c9..00000000000
--- a/egs/sitw/v1/local/make_musan.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-#           2018   Ewald Enzinger
-# Apache 2.0.
-#
-# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/sitw/v1/local/make_musan.sh b/egs/sitw/v1/local/make_musan.sh
deleted file mode 100755
index 1565ef0d85c..00000000000
--- a/egs/sitw/v1/local/make_musan.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/sitw/v1/run.sh b/egs/sitw/v1/run.sh
index e016f8a4752..797451df263 100755
--- a/egs/sitw/v1/run.sh
+++ b/egs/sitw/v1/run.sh
@@ -137,7 +137,7 @@ if [ $stage -le 4 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh $musan_root data
+  steps/data/make_musan.sh --sampling-rate 16000 $musan_root data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
diff --git a/egs/sitw/v2/run.sh b/egs/sitw/v2/run.sh
index 8aeecc18b3f..aad58e4a853 100755
--- a/egs/sitw/v2/run.sh
+++ b/egs/sitw/v2/run.sh
@@ -103,7 +103,7 @@ if [ $stage -le 2 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh $musan_root data
+  steps/data/make_musan.sh --sampling-rate 16000 $musan_root data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
diff --git a/egs/sre16/v1/local/make_musan.py b/egs/sre16/v1/local/make_musan.py
deleted file mode 100755
index 7735bd28818..00000000000
--- a/egs/sre16/v1/local/make_musan.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r 8k -t wav - |\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/sre16/v1/local/make_musan.sh b/egs/sre16/v1/local/make_musan.sh
deleted file mode 100755
index 694940ad70f..00000000000
--- a/egs/sre16/v1/local/make_musan.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/sre16/v1/run.sh b/egs/sre16/v1/run.sh
index 28481e27c3a..2315d7ac78a 100755
--- a/egs/sre16/v1/run.sh
+++ b/egs/sre16/v1/run.sh
@@ -145,7 +145,7 @@ if [ $stage -le 4 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh /export/corpora/JHU/musan data
+  steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
diff --git a/egs/sre16/v2/run.sh b/egs/sre16/v2/run.sh
index b2072dfd69d..7780c30560b 100755
--- a/egs/sre16/v2/run.sh
+++ b/egs/sre16/v2/run.sh
@@ -135,7 +135,7 @@ if [ $stage -le 2 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh /export/corpora/JHU/musan data
+  steps/data/make_musan.sh --sampling-rate 8000 /export/corpora/JHU/musan data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
@@ -174,6 +174,7 @@ if [ $stage -le 2 ]; then
   utils/copy_data_dir.sh data/swbd_sre_combined data/sre_combined
   utils/filter_scp.pl data/sre/spk2utt data/swbd_sre_combined/spk2utt | utils/spk2utt_to_utt2spk.pl > data/sre_combined/utt2spk
   utils/fix_data_dir.sh data/sre_combined
+
 fi
 
 # Now we prepare the features to generate examples for xvector training.
diff --git a/egs/voxceleb/v1/local/make_musan.py b/egs/voxceleb/v1/local/make_musan.py
deleted file mode 100755
index 565bfce0cc9..00000000000
--- a/egs/voxceleb/v1/local/make_musan.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2015   David Snyder
-#           2018   Ewald Enzinger
-# Apache 2.0.
-#
-# Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-# This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz.
-#
-# This file is meant to be invoked by make_musan.sh.
-
-import os, sys
-
-def process_music_annotations(path):
-  utt2spk = {}
-  utt2vocals = {}
-  lines = open(path, 'r').readlines()
-  for line in lines:
-    utt, genres, vocals, musician = line.rstrip().split()[:4]
-    # For this application, the musican ID isn't important
-    utt2spk[utt] = utt
-    utt2vocals[utt] = vocals == "Y"
-  return utt2spk, utt2vocals
-
-def prepare_music(root_dir, use_vocals):
-  utt2vocals = {}
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  music_dir = os.path.join(root_dir, "music")
-  for root, dirs, files in os.walk(music_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-      elif str(file) == "ANNOTATIONS":
-        utt2spk_part, utt2vocals_part = process_music_annotations(file_path)
-        utt2spk.update(utt2spk_part)
-        utt2vocals.update(utt2vocals_part)
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2vocals:
-    if utt in utt2wav:
-      if use_vocals or not utt2vocals[utt]:
-        utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-        utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In music directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_speech(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  speech_dir = os.path.join(root_dir, "speech")
-  for root, dirs, files in os.walk(speech_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In speech directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def prepare_noise(root_dir):
-  utt2spk = {}
-  utt2wav = {}
-  num_good_files = 0
-  num_bad_files = 0
-  noise_dir = os.path.join(root_dir, "noise")
-  for root, dirs, files in os.walk(noise_dir):
-    for file in files:
-      file_path = os.path.join(root, file)
-      if file.endswith(".wav"):
-        utt = str(file).replace(".wav", "")
-        utt2wav[utt] = file_path
-        utt2spk[utt] = utt
-  utt2spk_str = ""
-  utt2wav_str = ""
-  for utt in utt2spk:
-    if utt in utt2wav:
-      utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-      utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
-      num_good_files += 1
-    else:
-      print("Missing file {}".format(utt))
-      num_bad_files += 1
-  print("In noise directory, processed {} files; {} had missing wav data".format(num_good_files, num_bad_files))
-  return utt2spk_str, utt2wav_str
-
-def main():
-  in_dir = sys.argv[1]
-  out_dir = sys.argv[2]
-  use_vocals = sys.argv[3] == "Y"
-  utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals)
-  utt2spk_speech, utt2wav_speech = prepare_speech(in_dir)
-  utt2spk_noise, utt2wav_noise = prepare_noise(in_dir)
-  utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise
-  utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise
-  wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w')
-  wav_fi.write(utt2wav)
-  utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w')
-  utt2spk_fi.write(utt2spk)
-
-
-if __name__=="__main__":
-  main()
diff --git a/egs/voxceleb/v1/local/make_musan.sh b/egs/voxceleb/v1/local/make_musan.sh
deleted file mode 100755
index 1565ef0d85c..00000000000
--- a/egs/voxceleb/v1/local/make_musan.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright 2015   David Snyder
-# Apache 2.0.
-#
-# Copy of egs/sre16/v1/local/make_musan.sh (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8).
-#
-# This script, called by ../run.sh, creates the MUSAN
-# data directory. The required dataset is freely available at
-#   http://www.openslr.org/17/
-
-set -e
-in_dir=$1
-data_dir=$2
-use_vocals='Y'
-
-mkdir -p local/musan.tmp
-
-echo "Preparing ${data_dir}/musan..."
-mkdir -p ${data_dir}/musan
-local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals}
-
-utils/fix_data_dir.sh ${data_dir}/musan
-
-grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
-grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
-grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
-  ${data_dir}/musan ${data_dir}/musan_music
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
-  ${data_dir}/musan ${data_dir}/musan_speech
-utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
-  ${data_dir}/musan ${data_dir}/musan_noise
-
-utils/fix_data_dir.sh ${data_dir}/musan_music
-utils/fix_data_dir.sh ${data_dir}/musan_speech
-utils/fix_data_dir.sh ${data_dir}/musan_noise
-
-rm -rf local/musan.tmp
-
diff --git a/egs/voxceleb/v2/run.sh b/egs/voxceleb/v2/run.sh
index 44340873a80..7c70e4a42c1 100755
--- a/egs/voxceleb/v2/run.sh
+++ b/egs/voxceleb/v2/run.sh
@@ -30,7 +30,7 @@ if [ $stage -le 0 ]; then
   # This script creates data/voxceleb1_test and data/voxceleb1_train for latest version of VoxCeleb1.
   # Our evaluation set is the test portion of VoxCeleb1.
   local/make_voxceleb1_v2.pl $voxceleb1_root dev data/voxceleb1_train
-  local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test 
+  local/make_voxceleb1_v2.pl $voxceleb1_root test data/voxceleb1_test
   # if you downloaded the dataset soon after it was released, you will want to use the make_voxceleb1.pl script instead.
   # local/make_voxceleb1.pl $voxceleb1_root data
   # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
@@ -84,7 +84,7 @@ if [ $stage -le 2 ]; then
 
   # Prepare the MUSAN corpus, which consists of music, speech, and noise
   # suitable for augmentation.
-  local/make_musan.sh $musan_root data
+  steps/data/make_musan.sh --sampling-rate 16000 $musan_root data
 
   # Get the duration of the MUSAN recordings.  This will be used by the
   # script augment_data_dir.py.
diff --git a/egs/wsj/s5/steps/data/make_musan.py b/egs/wsj/s5/steps/data/make_musan.py
index 2a7bed453cb..80b9d7cf6d4 100755
--- a/egs/wsj/s5/steps/data/make_musan.py
+++ b/egs/wsj/s5/steps/data/make_musan.py
@@ -18,10 +18,10 @@ def get_args():
                         action=common_lib.StrToBoolAction,
                         choices=["true", "false"],
                         help='use vocals from the music corpus')
-    parser.add_argument('--sampling-rate', type=int, default=None,
+    parser.add_argument('--sampling-rate', type=int, default=16000,
                         help="Sampling rate of the source data. If a positive integer is specified with this option, "
                         "the MUSAN corpus will be resampled to the rate of the source data."
-                        "Original MUSAN corpus is sampled at 16KHz")
+                        "Original MUSAN corpus is sampled at 16KHz. Defaults to 16000 Hz")
     parser.add_argument("in_dir", help="Input data directory")
     parser.add_argument("out_dir", help="Output data directory")
 
@@ -75,11 +75,11 @@ def prepare_music(root_dir, use_vocals, sampling_rate):
         if utt in utt2wav:
             if use_vocals or not utt2vocals[utt]:
                 utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-                if sampling_rate is not None or sampling_rate != 16000:
+                if sampling_rate == 16000:
+                    utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
+                else:
                     utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \
                                     " {fs} -t wav - |\n".format(fs=sampling_rate)
-                else:
-                    utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
             num_good_files += 1
         else:
             print("Missing file {}".format(utt))
@@ -108,11 +108,11 @@ def prepare_speech(root_dir, sampling_rate):
     for utt in utt2spk:
         if utt in utt2wav:
             utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-            if sampling_rate is not None or sampling_rate != 16000:
-                utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \
-                        " {fs} -t wav - |\n".format(fs=sampling_rate)
-            else:
+            if sampling_rate == 16000:
                 utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
+            else:
+                utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \
+                                    " {fs} -t wav - |\n".format(fs=sampling_rate)
             num_good_files += 1
         else:
             print("Missing file {}".format(utt))
@@ -141,11 +141,11 @@ def prepare_noise(root_dir, sampling_rate):
     for utt in utt2spk:
         if utt in utt2wav:
             utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n"
-            if sampling_rate is not None or sampling_rate != 16000:
-                utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r " \
-                                    "{fs} -t wav - |\n".format(fs=sampling_rate)
-            else:
+            if sampling_rate == 16000:
                 utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n"
+            else:
+                utt2wav_str = utt2wav_str + utt + " sox -t wav " + utt2wav[utt] + " -r" \
+                                    " {fs} -t wav - |\n".format(fs=sampling_rate)
             num_good_files += 1
         else:
             print("Missing file {}".format(utt))