From 7fee29a52fa028ce0daa52cde128dff529897aee Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Fri, 16 Nov 2018 18:53:03 -0800 Subject: [PATCH 01/21] Initialized the example --- example/gluon/urban_sounds/datasets.py | 174 +++++++++++++++++++ example/gluon/urban_sounds/model.py | 34 ++++ example/gluon/urban_sounds/predict.py | 91 ++++++++++ example/gluon/urban_sounds/train.py | 165 ++++++++++++++++++ example/gluon/urban_sounds/transforms.py | 210 +++++++++++++++++++++++ 5 files changed, 674 insertions(+) create mode 100644 example/gluon/urban_sounds/datasets.py create mode 100644 example/gluon/urban_sounds/model.py create mode 100644 example/gluon/urban_sounds/predict.py create mode 100644 example/gluon/urban_sounds/train.py create mode 100644 example/gluon/urban_sounds/transforms.py diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py new file mode 100644 index 000000000000..013f8fa42da1 --- /dev/null +++ b/example/gluon/urban_sounds/datasets.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable= +""" Audio Dataset container.""" +__all__ = ['AudioFolderDataset'] + +import os +import warnings +import mxnet as mx +from mxnet.gluon.data import Dataset +from mxnet import ndarray as nd +try: + import librosa +except ImportError as e: + warnings.warn("gluon/contrib/data/audio/datasets.py : librosa dependency could not be resolved or \ + imported, could not load audio onto the numpy array.") + + +class AudioFolderDataset(Dataset): + """A dataset for loading Audio files stored in a folder structure like:: + + root/children_playing/0.wav + root/siren/23.wav + root/drilling/26.wav + root/dog_barking/42.wav + OR + Files(wav) and a csv file that has filename and associated label + + Parameters + ---------- + root : str + Path to root directory. + transform : callable, default None + A function that takes data and label and transforms them + train_csv: str, default None + train_csv should be populated by the training csv filename + file_format: str, default '.wav' + The format of the audio files(.wav, .mp3) + skip_rows: int, default 0 + While reading from csv file, how many rows to skip at the start of the file to avoid reading in header + + Attributes + ---------- + synsets : list + List of class names. `synsets[i]` is the name for the integer label `i` + items : list of tuples + List of all audio in (filename, label) pairs. + """ + def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): + if not librosa: + warnings.warn("pip install librosa to continue.") + return + self._root = os.path.expanduser(root) + self._exts = ['.wav'] + self._format = file_format + self._train_csv = train_csv + if file_format.lower() not in self._exts: + warnings.warn("format {} not supported currently.".format(file_format)) + return + self._list_audio_files(self._root, skip_rows=skip_rows) + + + def _list_audio_files(self, root, skip_rows=0): + """ + Populates synsets - a map of index to label for the data items. + Populates the data in the dataset, making tuples of (data, label) + """ + self.synsets = [] + self.items = [] + if self._train_csv is None: + for folder in sorted(os.listdir(root)): + path = os.path.join(root, folder) + if not os.path.isdir(path): + warnings.warn('Ignoring %s, which is not a directory.'%path, stacklevel=3) + continue + label = len(self.synsets) + self.synsets.append(folder) + for filename in sorted(os.listdir(path)): + file_name = os.path.join(path, filename) + ext = os.path.splitext(file_name)[1] + if ext.lower() not in self._exts: + warnings.warn('Ignoring %s of type %s. Only support %s'%(filename, ext, ', '.join(self._exts))) + continue + self.items.append((file_name, label)) + else: + data_tmp = [] + label_tmp = [] + skipped_rows = 0 + with open(self._train_csv, "r") as traincsv: + for line in traincsv: + skipped_rows = skipped_rows + 1 + if skipped_rows <= skip_rows: + continue + filename = os.path.join(root, line.split(",")[0]) + label = line.split(",")[1].strip() + if label not in self.synsets: + self.synsets.append(label) + data_tmp.append(os.path.join(self._root, line.split(",")[0])) + label_tmp.append(self.synsets.index(label)) + + #Generating the synset.txt file now + with open("./synset.txt", "w") as synsets_file: + for item in self.synsets: + synsets_file.write(item+os.linesep) + print("Synsets is generated as synset.txt") + + self._label = nd.array(label_tmp) + for i, _ in enumerate(data_tmp): + if self._format not in data_tmp[i]: + self.items.append((data_tmp[i]+self._format, self._label[i])) + + def __getitem__(self, idx): + """ + Retrieve the item (data, label) stored at idx in items + """ + filename = self.items[idx][0] + label = self.items[idx][1] + + if librosa is not None: + X1, _ = librosa.load(filename, res_type='kaiser_fast') + return nd.array(X1), label + + else: + warnings.warn(" Dependency librosa is not installed! \ + Cannot load the audio(wav) file into the numpy.ndarray.") + return self.items[idx][0], self.items[idx][1] + + def __len__(self): + """ + Retrieves the number of items in the dataset + """ + return len(self.items) + + + def transform_first(self, fn, lazy=True): + """Returns a new dataset with the first element of each sample + transformed by the transformer function `fn`. + + This is useful, for example, when you only want to transform data + while keeping label as is. + + Parameters + ---------- + fn : callable + A transformer function that takes the first elemtn of a sample + as input and returns the transformed element. + lazy : bool, default True + If False, transforms all samples at once. Otherwise, + transforms each sample on demand. Note that if `fn` + is stochastic, you must set lazy to True or you will + get the same result on all epochs. + + Returns + ------- + Dataset + The transformed dataset. + """ + return super(AudioFolderDataset, self).transform_first(fn, lazy=False) diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py new file mode 100644 index 000000000000..3b3c3500c2bb --- /dev/null +++ b/example/gluon/urban_sounds/model.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" + This module builds a model an MLP with a configurable output layer( number of units in the last layer). + Users can pass any number of units in the last layer. SInce this dataset has 10 labels, + the default value of num_labels = 10 +""" +import mxnet as mx +from mxnet import gluon + +# Defining a neural network with number of labels +def get_net(num_labels=10): + net = gluon.nn.Sequential() + with net.name_scope(): + net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes) + net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer + net.add(gluon.nn.Dense(num_labels)) + net.collect_params().initialize(mx.init.Normal(1.)) + return net diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py new file mode 100644 index 000000000000..4bbecb481bb9 --- /dev/null +++ b/example/gluon/urban_sounds/predict.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" Prediction module for Urban Sounds Classification +""" +import os +import warnings +import mxnet as mx +from mxnet import nd +from transforms import MFCC +from model import get_net + +def predict(prediction_dir='./Test'): + """The function is used to run predictions on the audio files in the directory `pred_directory`. + + Parameters + ---------- + net: + The model that has been trained. + prediction_dir: string, default ./Test + The directory that contains the audio files on which predictions are to be made + + """ + + try: + import librosa + except ImportError: + warnings.warn("Librosa is not installed! please run the following command pip install librosa.") + return + + if not os.path.exists(prediction_dir): + warnings.warn("The directory on which predictions are to be made is not found!") + return + + if len(os.listdir(prediction_dir)) == 0: + warnings.warn("The directory on which predictions are to be made is empty! Exiting...") + return + + # Loading synsets + if not os.path.exists('./synset.txt'): + warnings.warn("The synset or labels for the dataset do not exist. Please run the training script first.") + return + + with open("./synset.txt", "r") as f: + synset = [l.rstrip() for l in f] + net = get_net(len(synset)) + print("Trying to load the model with the saved parameters...") + if not os.path.exists("./net.params"): + warnings.warn("The model does not have any saved parameters... Cannot proceed! Train the model first") + return + + net.load_parameters("./net.params") + file_names = os.listdir(prediction_dir) + full_file_names = [os.path.join(prediction_dir, item) for item in file_names] + mfcc = MFCC() + print("\nStarting predictions for audio files in ", prediction_dir, " ....\n") + for filename in full_file_names: + # Argument kaiser_fast to res_type is faster than 'kaiser_best'. To reduce the load time, passing kaiser_fast. + X1, _ = librosa.load(filename, res_type='kaiser_fast') + transformed_test_data = mfcc(mx.nd.array(X1)) + output = net(transformed_test_data.reshape((1, -1))) + prediction = nd.argmax(output, axis=1) + print(filename, " -> ", synset[(int)(prediction.asscalar())]) + + +if __name__ == '__main__': + try: + import argparse + parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet") + parser.add_argument('--pred', '-p', help="Enter the folder path that contains your audio files", type=str) + args = parser.parse_args() + pred_dir = args.pred + + except ImportError: + warnings.warn("Argparse module not installed! passing default arguments.") + pred_dir = './Test' + predict(prediction_dir=pred_dir) + print("Urban sounds classification Prediction DONE!") diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py new file mode 100644 index 000000000000..2e12a85aa90b --- /dev/null +++ b/example/gluon/urban_sounds/train.py @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""The module to run training on the Urban sounds dataset +""" +import os +import time +import warnings +import mxnet as mx +from mxnet import gluon, nd, autograd +from datasets import AudioFolderDataset +from transforms import MFCC +import model + +def evaluate_accuracy(data_iterator, net): + """Function to evaluate accuracy of any data iterator passed to it as an argument""" + acc = mx.metric.Accuracy() + for _, (data, label) in enumerate(data_iterator): + output = net(data) + predictions = nd.argmax(output, axis=1) + predictions = predictions.reshape((-1, 1)) + acc.update(preds=predictions, labels=label) + return acc.get()[1] + + +def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): + """The function responsible for running the training the model.""" + try: + import librosa + except ImportError: + warnings.warn("The dependency librosa is not installed. Cannot continue") + return + if not train_dir or not os.path.exists(train_dir) or not train_csv: + warnings.warn("No train directory could be found ") + return + # Make a dataset from the local folder containing Audio data + print("\nMaking an Audio Dataset...\n") + tick = time.time() + aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_rows=1) + tock = time.time() + + print("Loading the dataset took ", (tock-tick), " seconds.") + print("\n=======================================\n") + print("Number of output classes = ", len(aud_dataset.synsets)) + print("\nThe labels are : \n") + print(aud_dataset.synsets) + # Get the model to train + net = model.get_net(len(aud_dataset.synsets)) + print("\nNeural Network = \n") + print(net) + print("\nModel - Neural Network Generated!\n") + print("=======================================\n") + + #Define the loss - Softmax CE Loss + softmax_loss = gluon.loss.SoftmaxCELoss(from_logits=False, sparse_label=True) + print("Loss function initialized!\n") + print("=======================================\n") + + #Define the trainer with the optimizer + trainer = gluon.Trainer(net.collect_params(), 'adadelta') + print("Optimizer - Trainer function initialized!\n") + print("=======================================\n") + print("Loading the dataset to the Gluon's OOTB Dataloader...") + + #Getting the data loader out of the AudioDataset and passing the transform + aud_transform = MFCC() + tick = time.time() + + audio_train_loader = gluon.data.DataLoader(aud_dataset.transform_first(aud_transform), batch_size=32, shuffle=True) + tock = time.time() + print("Time taken to load data and apply transform here is ", (tock-tick), " seconds.") + print("=======================================\n") + + + print("Starting the training....\n") + # Training loop + tick = time.time() + batch_size = batch_size + num_examples = len(aud_dataset) + + for e in range(epochs): + cumulative_loss = 0 + for _, (data, label) in enumerate(audio_train_loader): + with autograd.record(): + output = net(data) + loss = softmax_loss(output, label) + loss.backward() + + trainer.step(batch_size) + cumulative_loss += mx.nd.sum(loss).asscalar() + + if e%5 == 0: + train_accuracy = evaluate_accuracy(audio_train_loader, net) + print("Epoch %s. Loss: %s Train accuracy : %s " % (e, cumulative_loss/num_examples, train_accuracy)) + print("\n------------------------------\n") + + train_accuracy = evaluate_accuracy(audio_train_loader, net) + tock = time.time() + print("\nFinal training accuracy: ", train_accuracy) + + print("Training the sound classification for ", epochs, " epochs, MLP model took ", (tock-tick), " seconds") + print("====================== END ======================\n") + + print("Trying to save the model parameters here...") + net.save_parameters("./net.params") + print("Saved the model parameters in current directory.") + + +if __name__ == '__main__': + + try: + import argparse + parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet") + parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str) + parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\ + to label mapping", type=str) + parser.add_argument('--epochs', '-e', help="Enter the number of epochs \ + you would want to run the training for.", type=int) + parser.add_argument('--batch_size', '-b', help="Enter the batch_size of data", type=int) + args = parser.parse_args() + + if args: + if args.train: + training_dir = args.train + else: + training_dir = './Train' + + if args.csv: + training_csv = args.csv + else: + training_csv = './train.csv' + + if args.epochs: + eps = args.epochs + else: + eps = 30 + + if args.batch_size: + batch_sz = args.batch_size + else: + batch_sz = 32 + + except ImportError as er: + warnings.warn("Argument parsing module could not be imported \ + Passing default arguments.") + training_dir = './Train' + training_csv = './train.csv' + eps = 30 + batch_sz = 32 + + train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz) + print("Urban sounds classification Training DONE!") diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py new file mode 100644 index 000000000000..ec626e42fb0b --- /dev/null +++ b/example/gluon/urban_sounds/transforms.py @@ -0,0 +1,210 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable= arguments-differ +"Audio transforms." + +import warnings +import numpy as np +try: + import librosa +except ImportError as e: + warnings.warn("gluon/contrib/data/audio/transforms.py : librosa dependency could not be resolved or \ + imported, could not provide some/all transform.") + +from mxnet import ndarray as nd +from mxnet.gluon.block import Block + +class MFCC(Block): + """Extracts Mel frequency cepstrum coefficients from the audio data file + More details : https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html + + Attributes + ---------- + sampling_rate: int, default 22050 + sampling rate of the input audio signal + num_mfcc: int, default 20 + number of mfccs to return + + + Inputs: + - **x**: input tensor (samples, ) shape. + + Outputs: + - **out**: output array is a scaled NDArray with (samples, ) shape. + + """ + + def __init__(self, sampling_rate=22050, num_mfcc=20): + self._sampling_rate = sampling_rate + self._num_fcc = num_mfcc + super(MFCC, self).__init__() + + def forward(self, x): + if not librosa: + warnings.warn("Librosa dependency is not installed! Install that and retry") + return x + if isinstance(x, np.ndarray): + y = x + elif isinstance(x, nd.NDArray): + y = x.asnumpy() + else: + warnings.warn("MFCC - allowed datatypes mx.nd.NDArray and numpy.ndarray") + return x + + audio_tmp = np.mean(librosa.feature.mfcc(y=y, sr=self._sampling_rate, n_mfcc=self._num_fcc).T, axis=0) + return nd.array(audio_tmp) + + +class Scale(Block): + """Scale audio numpy.ndarray from a 16-bit integer to a floating point number between + -1.0 and 1.0. The 16-bit integer is the sample resolution or bit depth. + + Attributes + ---------- + scale_factor : float + The factor to scale the input tensor by. + + + Inputs: + - **x**: input tensor (samples, ) shape. + + Outputs: + - **out**: output array is a scaled NDArray with (samples, ) shape. + + Examples + -------- + >>> scale = audio.transforms.Scale(scale_factor=2) + >>> audio_samples = mx.nd.array([2,3,4]) + >>> scale(audio_samples) + [1. 1.5 2. ] + + + """ + + def __init__(self, scale_factor=2**31): + self.scale_factor = scale_factor + super(Scale, self).__init__() + + def forward(self, x): + if isinstance(x, np.ndarray): + return nd.array(x/self.scale_factor) + return x / self.scale_factor + + +class PadTrim(Block): + """Pad/Trim a 1d-NDArray of NPArray (Signal or Labels) + + Attributes + ---------- + max_len : int + Length to which the array will be padded or trimmed to. + fill_value: int or float + If there is a need of padding, what value to padd at the end of the input array + + + Inputs: + - **x**: input tensor (samples, ) shape. + + Outputs: + - **out**: output array is a scaled NDArray with (max_len, ) shape. + + Examples + -------- + >>> padtrim = audio.transforms.PadTrim(max_len=9, fill_value=0) + >>> audio_samples = mx.nd.array([1,2,3,4,5]) + >>> padtrim(audio_samples) + [1. 2. 3. 4. 5. 0. 0. 0. 0.] + + + """ + + def __init__(self, max_len, fill_value=0): + self._max_len = max_len + self._fill_value = fill_value + super(PadTrim, self).__init__() + + def forward(self, x): + if isinstance(x, np.ndarray): + x = nd.array(x) + if self._max_len > x.size: + pad = nd.ones((self._max_len - x.size,)) * self._fill_value + x = nd.concat(x, pad, dim=0) + elif self._max_len < x.size: + x = x[:self._max_len] + return x + + +class MEL(Block): + """Create MEL Spectrograms from a raw audio signal. Relatively pretty slow. + + Attributes + ---------- + sampling_rate: int, default 22050 + sampling rate of the input audio signal + num_fft: int, default 2048 + length of the Fast fourier transform window + num_mels: int, default 20 + number of mel bands to generate + hop_length: int, default 512 + total samples between successive frames + + + Inputs: + - **x**: input tensor (samples, ) shape. + + Outputs: + - **out**: output array which consists of mel spectograms, shape = (n_mels, 1) + + Usage (see librosa.feature.melspectrogram docs): + MEL(sr=16000, n_fft=1600, hop_length=800, n_mels=64) + + Examples + -------- + >>> mel = audio.transforms.MEL() + >>> audio_samples = mx.nd.array([1,2,3,4,5]) + >>> mel(audio_samples) + [[3.81801406e+04] + [9.86858240e-29] + [1.87405472e-29] + [2.38637225e-29] + [3.94043010e-29] + [3.67071565e-29] + [7.29390295e-29] + [8.84324438e-30]... + + + """ + + def __init__(self, sampling_rate=22050, num_fft=2048, num_mels=20, hop_length=512): + self._sampling_rate = sampling_rate + self._num_fft = num_fft + self._num_mels = num_mels + self._hop_length = hop_length + super(MEL, self).__init__() + + def forward(self, x): + if librosa is None: + warnings.warn("Cannot create spectrograms, since dependency librosa is not installed!") + return x + if isinstance(x, nd.NDArray): + x = x.asnumpy() + specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\ + n_fft=self._num_fft, n_mels=self._num_mels, hop_length=self._hop_length) + return nd.array(specs) + \ No newline at end of file From 8360a4e22ac77268911bd193d19aa1947296defc Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Mon, 19 Nov 2018 17:38:51 -0800 Subject: [PATCH 02/21] Addressed PR comments, about existing synset.txt file - no overwrite --- example/gluon/urban_sounds/datasets.py | 17 +++++++++-------- example/gluon/urban_sounds/model.py | 7 +++---- example/gluon/urban_sounds/predict.py | 4 ++-- example/gluon/urban_sounds/train.py | 6 +----- example/gluon/urban_sounds/transforms.py | 6 ------ 5 files changed, 15 insertions(+), 25 deletions(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 013f8fa42da1..52280c467592 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -22,7 +22,6 @@ import os import warnings -import mxnet as mx from mxnet.gluon.data import Dataset from mxnet import ndarray as nd try: @@ -77,9 +76,8 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): def _list_audio_files(self, root, skip_rows=0): - """ - Populates synsets - a map of index to label for the data items. - Populates the data in the dataset, making tuples of (data, label) + """Populates synsets - a map of index to label for the data items. + Populates the data in the dataset, making tuples of (data, label) """ self.synsets = [] self.items = [] @@ -115,10 +113,13 @@ def _list_audio_files(self, root, skip_rows=0): label_tmp.append(self.synsets.index(label)) #Generating the synset.txt file now - with open("./synset.txt", "w") as synsets_file: - for item in self.synsets: - synsets_file.write(item+os.linesep) - print("Synsets is generated as synset.txt") + if not os.path.exists("./synset.txt"): + with open("./synset.txt", "w") as synsets_file: + for item in self.synsets: + synsets_file.write(item+os.linesep) + print("Synsets is generated as synset.txt") + else: + warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.") self._label = nd.array(label_tmp) for i, _ in enumerate(data_tmp): diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py index 3b3c3500c2bb..5933aaa57b6f 100644 --- a/example/gluon/urban_sounds/model.py +++ b/example/gluon/urban_sounds/model.py @@ -15,10 +15,9 @@ # specific language governing permissions and limitations # under the License. -""" - This module builds a model an MLP with a configurable output layer( number of units in the last layer). - Users can pass any number of units in the last layer. SInce this dataset has 10 labels, - the default value of num_labels = 10 +"""This module builds a model an MLP with a configurable output layer( number of units in the last layer). +Users can pass any number of units in the last layer. SInce this dataset has 10 labels, +the default value of num_labels = 10 """ import mxnet as mx from mxnet import gluon diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py index 4bbecb481bb9..9b92541bb27b 100644 --- a/example/gluon/urban_sounds/predict.py +++ b/example/gluon/urban_sounds/predict.py @@ -28,11 +28,11 @@ def predict(prediction_dir='./Test'): Parameters ---------- - net: + net: The model that has been trained. prediction_dir: string, default ./Test The directory that contains the audio files on which predictions are to be made - + """ try: diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index 2e12a85aa90b..04c8f20b879f 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -38,11 +38,7 @@ def evaluate_accuracy(data_iterator, net): def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): """The function responsible for running the training the model.""" - try: - import librosa - except ImportError: - warnings.warn("The dependency librosa is not installed. Cannot continue") - return + if not train_dir or not os.path.exists(train_dir) or not train_csv: warnings.warn("No train directory could be found ") return diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index ec626e42fb0b..ef079aa61ec3 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -56,9 +56,6 @@ def __init__(self, sampling_rate=22050, num_mfcc=20): super(MFCC, self).__init__() def forward(self, x): - if not librosa: - warnings.warn("Librosa dependency is not installed! Install that and retry") - return x if isinstance(x, np.ndarray): y = x elif isinstance(x, nd.NDArray): @@ -199,9 +196,6 @@ def __init__(self, sampling_rate=22050, num_fft=2048, num_mels=20, hop_length=51 super(MEL, self).__init__() def forward(self, x): - if librosa is None: - warnings.warn("Cannot create spectrograms, since dependency librosa is not installed!") - return x if isinstance(x, nd.NDArray): x = x.asnumpy() specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\ From 5e006827dcdc8965fbc59c668fb5aa2e48ac9968 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Mon, 19 Nov 2018 17:44:46 -0800 Subject: [PATCH 03/21] RST - docstring issues fixed --- example/gluon/urban_sounds/datasets.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 52280c467592..dc760bf74fb4 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -54,12 +54,14 @@ class AudioFolderDataset(Dataset): skip_rows: int, default 0 While reading from csv file, how many rows to skip at the start of the file to avoid reading in header + Attributes ---------- synsets : list List of class names. `synsets[i]` is the name for the integer label `i` items : list of tuples List of all audio in (filename, label) pairs. + """ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): if not librosa: @@ -127,9 +129,7 @@ def _list_audio_files(self, root, skip_rows=0): self.items.append((data_tmp[i]+self._format, self._label[i])) def __getitem__(self, idx): - """ - Retrieve the item (data, label) stored at idx in items - """ + """Retrieve the item (data, label) stored at idx in items""" filename = self.items[idx][0] label = self.items[idx][1] @@ -143,9 +143,7 @@ def __getitem__(self, idx): return self.items[idx][0], self.items[idx][1] def __len__(self): - """ - Retrieves the number of items in the dataset - """ + """Retrieves the number of items in the dataset""" return len(self.items) @@ -171,5 +169,6 @@ def transform_first(self, fn, lazy=True): ------- Dataset The transformed dataset. + """ return super(AudioFolderDataset, self).transform_first(fn, lazy=False) From 3385a7d84d7f2ee89b3ddd0c3c2e0beeaddfb4ce Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Mon, 19 Nov 2018 17:56:25 -0800 Subject: [PATCH 04/21] added README --- example/gluon/urban_sounds/README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 example/gluon/urban_sounds/README.md diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md new file mode 100644 index 000000000000..f7e33136092f --- /dev/null +++ b/example/gluon/urban_sounds/README.md @@ -0,0 +1,22 @@ +# Urban Sounds classification in MXNet + +Urban Sounds Dataset: +## Description + The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on. + The task is to classify these audio samples into one of the 10 labels. + +To be able to run this example: + +1. Download the dataset(train.zip, test.zip) required for this example from the location: +**https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU** + + +2. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\ + **Train** and **Test** and two csv files - **train.csv**, **test.csv** + +3. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/** + +4. Librosa is installed. To install, use the commands + `pip install librosa`, + For more details, refer here: + **https://librosa.github.io/librosa/install.html** From 6d029aee3db156cb8ce26e03cbbfd1ebc58d2f2a Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Tue, 20 Nov 2018 12:39:40 -0800 Subject: [PATCH 05/21] Addressed PR comments --- example/gluon/urban_sounds/README.md | 4 ++++ example/gluon/urban_sounds/datasets.py | 13 +++++++++---- example/gluon/urban_sounds/transforms.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index f7e33136092f..ad56be6dc38b 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -20,3 +20,7 @@ To be able to run this example: `pip install librosa`, For more details, refer here: **https://librosa.github.io/librosa/install.html** + + +For information on the current design of how the AudioFolderDataset is implemented, refer below: +**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio** \ No newline at end of file diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index dc760bf74fb4..f0529aa05129 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -27,8 +27,8 @@ try: import librosa except ImportError as e: - warnings.warn("gluon/contrib/data/audio/datasets.py : librosa dependency could not be resolved or \ - imported, could not load audio onto the numpy array.") + warnings.warn("librosa dependency could not be resolved or \ + imported, could not load audio onto the numpy array. pip install librosa") class AudioFolderDataset(Dataset): @@ -50,7 +50,7 @@ class AudioFolderDataset(Dataset): train_csv: str, default None train_csv should be populated by the training csv filename file_format: str, default '.wav' - The format of the audio files(.wav, .mp3) + The format of the audio files(.wav) skip_rows: int, default 0 While reading from csv file, how many rows to skip at the start of the file to avoid reading in header @@ -133,6 +133,9 @@ def __getitem__(self, idx): filename = self.items[idx][0] label = self.items[idx][1] + # res_type is resampling type for the audio signal + # can be passed values like 'kaiser_best', 'kaiser_fast'. 'kaiser_fast' performs better and used + # more than kaiser_best if librosa is not None: X1, _ = librosa.load(filename, res_type='kaiser_fast') return nd.array(X1), label @@ -147,12 +150,14 @@ def __len__(self): return len(self.items) - def transform_first(self, fn, lazy=True): + def transform_first(self, fn, lazy=False): """Returns a new dataset with the first element of each sample transformed by the transformer function `fn`. This is useful, for example, when you only want to transform data while keeping label as is. + lazy=False is passed to transform_first for dataset so that all tramsforms could be performed in + one shot and not during training. This is a performance consideration. Parameters ---------- diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index ef079aa61ec3..822314238734 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -24,7 +24,7 @@ try: import librosa except ImportError as e: - warnings.warn("gluon/contrib/data/audio/transforms.py : librosa dependency could not be resolved or \ + warnings.warn("librosa dependency could not be resolved or \ imported, could not provide some/all transform.") from mxnet import ndarray as nd From 1e30f7c3a9829cadd2ecb421da58b35fe9ba6439 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Tue, 20 Nov 2018 15:13:50 -0800 Subject: [PATCH 06/21] Addressed PR comments, checking Divide by 0 --- example/gluon/urban_sounds/datasets.py | 23 +++++++---------------- example/gluon/urban_sounds/train.py | 4 ++-- example/gluon/urban_sounds/transforms.py | 3 +++ 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index f0529aa05129..112669acc50d 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -87,7 +87,7 @@ def _list_audio_files(self, root, skip_rows=0): for folder in sorted(os.listdir(root)): path = os.path.join(root, folder) if not os.path.isdir(path): - warnings.warn('Ignoring %s, which is not a directory.'%path, stacklevel=3) + warnings.warn('Ignoring {}, which is not a directory.'.format(path)) continue label = len(self.synsets) self.synsets.append(folder) @@ -95,12 +95,11 @@ def _list_audio_files(self, root, skip_rows=0): file_name = os.path.join(path, filename) ext = os.path.splitext(file_name)[1] if ext.lower() not in self._exts: - warnings.warn('Ignoring %s of type %s. Only support %s'%(filename, ext, ', '.join(self._exts))) + warnings.warn('Ignoring {} of type {}. Only support {}'\ + .format(filename, ext, ', '.join(self._exts))) continue self.items.append((file_name, label)) else: - data_tmp = [] - label_tmp = [] skipped_rows = 0 with open(self._train_csv, "r") as traincsv: for line in traincsv: @@ -111,35 +110,27 @@ def _list_audio_files(self, root, skip_rows=0): label = line.split(",")[1].strip() if label not in self.synsets: self.synsets.append(label) - data_tmp.append(os.path.join(self._root, line.split(",")[0])) - label_tmp.append(self.synsets.index(label)) + if self._format not in filename: + filename = filename+self._format + self.items.append((filename, nd.array(self.synsets.index(label)).reshape((1,)))) #Generating the synset.txt file now if not os.path.exists("./synset.txt"): with open("./synset.txt", "w") as synsets_file: for item in self.synsets: synsets_file.write(item+os.linesep) - print("Synsets is generated as synset.txt") + print("Synsets is generated as synset.txt") else: warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.") - self._label = nd.array(label_tmp) - for i, _ in enumerate(data_tmp): - if self._format not in data_tmp[i]: - self.items.append((data_tmp[i]+self._format, self._label[i])) def __getitem__(self, idx): """Retrieve the item (data, label) stored at idx in items""" filename = self.items[idx][0] label = self.items[idx][1] - - # res_type is resampling type for the audio signal - # can be passed values like 'kaiser_best', 'kaiser_fast'. 'kaiser_fast' performs better and used - # more than kaiser_best if librosa is not None: X1, _ = librosa.load(filename, res_type='kaiser_fast') return nd.array(X1), label - else: warnings.warn(" Dependency librosa is not installed! \ Cannot load the audio(wav) file into the numpy.ndarray.") diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index 04c8f20b879f..6dae4e8f18a2 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -28,7 +28,7 @@ def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" acc = mx.metric.Accuracy() - for _, (data, label) in enumerate(data_iterator): + for data, label in data_iterator: output = net(data) predictions = nd.argmax(output, axis=1) predictions = predictions.reshape((-1, 1)) @@ -89,7 +89,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): for e in range(epochs): cumulative_loss = 0 - for _, (data, label) in enumerate(audio_train_loader): + for data, label in audio_train_loader: with autograd.record(): output = net(data) loss = softmax_loss(output, label) diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index 822314238734..2e5c807461fe 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -99,6 +99,9 @@ def __init__(self, scale_factor=2**31): super(Scale, self).__init__() def forward(self, x): + if self.scale_factor == 0: + warnings.warn("Scale factor cannot be 0.") + return x if isinstance(x, np.ndarray): return nd.array(x/self.scale_factor) return x / self.scale_factor From 662749bb1c4b9fc9633b68ecb7ef050f48f54574 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Tue, 20 Nov 2018 15:18:07 -0800 Subject: [PATCH 07/21] Raising error if format is not supported. --- example/gluon/urban_sounds/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 112669acc50d..b26b1546778c 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -72,8 +72,8 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): self._format = file_format self._train_csv = train_csv if file_format.lower() not in self._exts: - warnings.warn("format {} not supported currently.".format(file_format)) - return + raise RuntimeError("format {} not supported currently.".format(file_format)) + self._list_audio_files(self._root, skip_rows=skip_rows) From acf48c4f95bd64110990201a34b0e6fa4522dff9 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Wed, 21 Nov 2018 09:50:07 -0800 Subject: [PATCH 08/21] changed a line for ndarray of labels --- example/gluon/urban_sounds/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index b26b1546778c..7ecc8783dbcf 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -112,7 +112,7 @@ def _list_audio_files(self, root, skip_rows=0): self.synsets.append(label) if self._format not in filename: filename = filename+self._format - self.items.append((filename, nd.array(self.synsets.index(label)).reshape((1,)))) + self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,)))) #Generating the synset.txt file now if not os.path.exists("./synset.txt"): From 5e37fb8f7ce19dc279ace479a285199a174f3b26 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Wed, 21 Nov 2018 16:10:50 -0800 Subject: [PATCH 09/21] Trigger CI From 4fe850c854f58a728ac3cf182c6694a6b01d4b1c Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Thu, 22 Nov 2018 10:35:09 -0800 Subject: [PATCH 10/21] Trigger CI From 214d4baf259fe067bb7deb60349d31c85d1fa40d Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Mon, 26 Nov 2018 17:47:17 -0800 Subject: [PATCH 11/21] PR comments addressed around skip_header argument --- example/gluon/urban_sounds/README.md | 41 +++++++++++++++++++++++- example/gluon/urban_sounds/datasets.py | 15 +++++---- example/gluon/urban_sounds/model.py | 4 +-- example/gluon/urban_sounds/train.py | 2 +- example/gluon/urban_sounds/transforms.py | 4 +-- 5 files changed, 54 insertions(+), 12 deletions(-) diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index ad56be6dc38b..f16206f2d4a5 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -23,4 +23,43 @@ To be able to run this example: For information on the current design of how the AudioFolderDataset is implemented, refer below: -**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio** \ No newline at end of file +**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio** + +## Usage + +For training: + +- arguments + - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train" + - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv" + - epochs : Number of epochs to train the model. Default = 30 + - batch_size : The batch size for training. Default = 32 + + +###### default setting +``` +python train.py +``` +or + +###### manual setting +``` +python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 +``` + +For prediction: + +- arguments + - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test" + + +###### default setting +``` +python predict.py +``` +or + +###### manual setting +``` +python train.py --pred ./Test +``` \ No newline at end of file diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 7ecc8783dbcf..2b4fe7519e25 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -39,7 +39,7 @@ class AudioFolderDataset(Dataset): root/drilling/26.wav root/dog_barking/42.wav OR - Files(wav) and a csv file that has filename and associated label + Files(wav) and a csv file that has file name and associated label Parameters ---------- @@ -51,8 +51,8 @@ class AudioFolderDataset(Dataset): train_csv should be populated by the training csv filename file_format: str, default '.wav' The format of the audio files(.wav) - skip_rows: int, default 0 - While reading from csv file, how many rows to skip at the start of the file to avoid reading in header + skip_header: boolean, default False + While reading from csv file, whether to skip at the start of the file to avoid reading in header Attributes @@ -63,7 +63,7 @@ class AudioFolderDataset(Dataset): List of all audio in (filename, label) pairs. """ - def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): + def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False): if not librosa: warnings.warn("pip install librosa to continue.") return @@ -73,7 +73,10 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0): self._train_csv = train_csv if file_format.lower() not in self._exts: raise RuntimeError("format {} not supported currently.".format(file_format)) - + if skip_header: + skip_rows = 1 + else: + skip_rows = 0 self._list_audio_files(self._root, skip_rows=skip_rows) @@ -153,7 +156,7 @@ def transform_first(self, fn, lazy=False): Parameters ---------- fn : callable - A transformer function that takes the first elemtn of a sample + A transformer function that takes the first element of a sample as input and returns the transformed element. lazy : bool, default True If False, transforms all samples at once. Otherwise, diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py index 5933aaa57b6f..af23cb946e2e 100644 --- a/example/gluon/urban_sounds/model.py +++ b/example/gluon/urban_sounds/model.py @@ -27,7 +27,7 @@ def get_net(num_labels=10): net = gluon.nn.Sequential() with net.name_scope(): net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes) - net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer + net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer ( 256 nodes ) net.add(gluon.nn.Dense(num_labels)) - net.collect_params().initialize(mx.init.Normal(1.)) + net.collect_params().initialize(mx.init.Xavier()) return net diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index 6dae4e8f18a2..a2e3066bd332 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -45,7 +45,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): # Make a dataset from the local folder containing Audio data print("\nMaking an Audio Dataset...\n") tick = time.time() - aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_rows=1) + aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_header=True) tock = time.time() print("Loading the dataset took ", (tock-tick), " seconds.") diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index 2e5c807461fe..a75e1543338d 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -115,7 +115,7 @@ class PadTrim(Block): max_len : int Length to which the array will be padded or trimmed to. fill_value: int or float - If there is a need of padding, what value to padd at the end of the input array + If there is a need of padding, what value to pad at the end of the input array. Inputs: @@ -158,7 +158,7 @@ class MEL(Block): sampling_rate: int, default 22050 sampling rate of the input audio signal num_fft: int, default 2048 - length of the Fast fourier transform window + length of the Fast Fourier transform window num_mels: int, default 20 number of mel bands to generate hop_length: int, default 512 From 75e1507152d2ab98074d81f27055484492d60f09 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Tue, 27 Nov 2018 15:39:52 -0800 Subject: [PATCH 12/21] Addressed PR comments around librosa import --- example/gluon/urban_sounds/README.md | 29 ++++-- example/gluon/urban_sounds/datasets.py | 109 ++++++++++---------- example/gluon/urban_sounds/predict.py | 10 +- example/gluon/urban_sounds/requirements.txt | 2 + example/gluon/urban_sounds/train.py | 20 ++-- example/gluon/urban_sounds/transforms.py | 1 - 6 files changed, 90 insertions(+), 81 deletions(-) create mode 100644 example/gluon/urban_sounds/requirements.txt diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index f16206f2d4a5..76e6c20d4cd3 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -1,5 +1,13 @@ # Urban Sounds classification in MXNet +This example provides an end-to-end pipeline for a common datahack competition - Urban Sounds Classification Example. +Below is the link to the competition: +https://datahack.analyticsvidhya.com/contest/practice-problem-urban-sound-classification/ + +After logging in, the data set can be downloaded. +The details of the dataset and the link to download it are given below: + + Urban Sounds Dataset: ## Description The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on. @@ -7,19 +15,22 @@ Urban Sounds Dataset: To be able to run this example: -1. Download the dataset(train.zip, test.zip) required for this example from the location: -**https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU** - +1. `pip install -r ./requirements.txt` + + This step installs the required libraries to run the example. + The main dependency that is required is: Librosa. + The version used to test the example is: `0.6.2` + For more details, refer here: +*https://librosa.github.io/librosa/install.html* + +2. Download the dataset(train.zip, test.zip) required for this example from the location: +https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU -2. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\ +3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\ **Train** and **Test** and two csv files - **train.csv**, **test.csv** -3. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/** +4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/** -4. Librosa is installed. To install, use the commands - `pip install librosa`, - For more details, refer here: - **https://librosa.github.io/librosa/install.html** For information on the current design of how the AudioFolderDataset is implemented, refer below: diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 2b4fe7519e25..78da5eb2e84d 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -22,15 +22,18 @@ import os import warnings +from itertools import islice +import csv from mxnet.gluon.data import Dataset from mxnet import ndarray as nd try: import librosa except ImportError as e: - warnings.warn("librosa dependency could not be resolved or \ + raise ImportError("librosa dependency could not be resolved or \ imported, could not load audio onto the numpy array. pip install librosa") + class AudioFolderDataset(Dataset): """A dataset for loading Audio files stored in a folder structure like:: @@ -58,7 +61,7 @@ class AudioFolderDataset(Dataset): Attributes ---------- synsets : list - List of class names. `synsets[i]` is the name for the integer label `i` + List of class names. `synsets[i]` is the name for the `i`th label items : list of tuples List of all audio in (filename, label) pairs. @@ -66,17 +69,16 @@ class AudioFolderDataset(Dataset): def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False): if not librosa: warnings.warn("pip install librosa to continue.") - return + raise RuntimeError("Librosa not installed. Run pip install librosa and retry this step.") self._root = os.path.expanduser(root) self._exts = ['.wav'] self._format = file_format self._train_csv = train_csv if file_format.lower() not in self._exts: raise RuntimeError("format {} not supported currently.".format(file_format)) + skip_rows = 0 if skip_header: skip_rows = 1 - else: - skip_rows = 0 self._list_audio_files(self._root, skip_rows=skip_rows) @@ -86,58 +88,61 @@ def _list_audio_files(self, root, skip_rows=0): """ self.synsets = [] self.items = [] - if self._train_csv is None: - for folder in sorted(os.listdir(root)): - path = os.path.join(root, folder) - if not os.path.isdir(path): - warnings.warn('Ignoring {}, which is not a directory.'.format(path)) - continue - label = len(self.synsets) - self.synsets.append(folder) - for filename in sorted(os.listdir(path)): - file_name = os.path.join(path, filename) - ext = os.path.splitext(file_name)[1] - if ext.lower() not in self._exts: - warnings.warn('Ignoring {} of type {}. Only support {}'\ - .format(filename, ext, ', '.join(self._exts))) - continue - self.items.append((file_name, label)) + if not self._train_csv: + # The audio files are organized in folder structure with + # directory name as label and audios in them + self._folder_structure(root) else: - skipped_rows = 0 - with open(self._train_csv, "r") as traincsv: - for line in traincsv: - skipped_rows = skipped_rows + 1 - if skipped_rows <= skip_rows: - continue - filename = os.path.join(root, line.split(",")[0]) - label = line.split(",")[1].strip() - if label not in self.synsets: - self.synsets.append(label) - if self._format not in filename: - filename = filename+self._format - self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,)))) - - #Generating the synset.txt file now - if not os.path.exists("./synset.txt"): - with open("./synset.txt", "w") as synsets_file: - for item in self.synsets: - synsets_file.write(item+os.linesep) - print("Synsets is generated as synset.txt") - else: - warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.") + # train_csv contains mapping between filename and label + self._csv_labelled_dataset(root, skip_rows=skip_rows) + + #Generating the synset.txt file now + if not os.path.exists("./synset.txt"): + with open("./synset.txt", "w") as synsets_file: + for item in self.synsets: + synsets_file.write(item+os.linesep) + print("Synsets is generated as synset.txt") + else: + warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.") + + + def _folder_structure(self, root): + for folder in sorted(os.listdir(root)): + path = os.path.join(root, folder) + if not os.path.isdir(path): + warnings.warn('Ignoring {}, which is not a directory.'.format(path)) + continue + label = len(self.synsets) + self.synsets.append(folder) + for filename in sorted(os.listdir(path)): + file_name = os.path.join(path, filename) + ext = os.path.splitext(file_name)[1] + if ext.lower() not in self._exts: + warnings.warn('Ignoring {} of type {}. Only support {}'\ + .format(filename, ext, ', '.join(self._exts))) + continue + self.items.append((file_name, label)) + + + def _csv_labelled_dataset(self, root, skip_rows=0): + with open(self._train_csv, "r") as traincsv: + for line in islice(csv.reader(traincsv), skip_rows, None): + filename = os.path.join(root, line[0]) + label = line[1].strip() + if label not in self.synsets: + self.synsets.append(label) + if self._format not in filename: + filename = filename+self._format + self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,)))) def __getitem__(self, idx): """Retrieve the item (data, label) stored at idx in items""" - filename = self.items[idx][0] - label = self.items[idx][1] - if librosa is not None: - X1, _ = librosa.load(filename, res_type='kaiser_fast') - return nd.array(X1), label - else: - warnings.warn(" Dependency librosa is not installed! \ - Cannot load the audio(wav) file into the numpy.ndarray.") - return self.items[idx][0], self.items[idx][1] + filename, label = self.items[idx] + # resampling_type is passed as kaiser_fast for a better performance + X1, _ = librosa.load(filename, res_type='kaiser_fast') + return nd.array(X1), label + def __len__(self): """Retrieves the number of items in the dataset""" diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py index 9b92541bb27b..1c37bd0204db 100644 --- a/example/gluon/urban_sounds/predict.py +++ b/example/gluon/urban_sounds/predict.py @@ -22,6 +22,10 @@ from mxnet import nd from transforms import MFCC from model import get_net +try: + import librosa +except ImportError: + raise ImportError("Librosa is not installed! please run the following command pip install librosa.") def predict(prediction_dir='./Test'): """The function is used to run predictions on the audio files in the directory `pred_directory`. @@ -35,12 +39,6 @@ def predict(prediction_dir='./Test'): """ - try: - import librosa - except ImportError: - warnings.warn("Librosa is not installed! please run the following command pip install librosa.") - return - if not os.path.exists(prediction_dir): warnings.warn("The directory on which predictions are to be made is not found!") return diff --git a/example/gluon/urban_sounds/requirements.txt b/example/gluon/urban_sounds/requirements.txt new file mode 100644 index 000000000000..d885e0beec7e --- /dev/null +++ b/example/gluon/urban_sounds/requirements.txt @@ -0,0 +1,2 @@ +librosa>=0.6.2 # librosa is a library that is used to load the audio(wav) files and provides capabilities of feature extraction. +argparse # used for parsing arguments \ No newline at end of file diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index a2e3066bd332..a30e6ae78701 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -116,10 +116,14 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): if __name__ == '__main__': + training_dir = './Train' + training_csv = './train.csv' + eps = 30 + batch_sz = 32 try: import argparse - parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet") + parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet Gluon") parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str) parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\ to label mapping", type=str) @@ -131,31 +135,21 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): if args: if args.train: training_dir = args.train - else: - training_dir = './Train' if args.csv: training_csv = args.csv - else: - training_csv = './train.csv' if args.epochs: eps = args.epochs - else: - eps = 30 if args.batch_size: batch_sz = args.batch_size - else: - batch_sz = 32 + except ImportError as er: warnings.warn("Argument parsing module could not be imported \ Passing default arguments.") - training_dir = './Train' - training_csv = './train.csv' - eps = 30 - batch_sz = 32 + train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz) print("Urban sounds classification Training DONE!") diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index a75e1543338d..9b3f9428f272 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -204,4 +204,3 @@ def forward(self, x): specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\ n_fft=self._num_fft, n_mels=self._num_mels, hop_length=self._hop_length) return nd.array(specs) - \ No newline at end of file From cc3714a7ce6a1082122448a26b2fa49fbf4a9855 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Tue, 27 Nov 2018 20:13:42 -0800 Subject: [PATCH 13/21] PR Comments --- example/gluon/urban_sounds/datasets.py | 2 +- example/gluon/urban_sounds/train.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 78da5eb2e84d..39c3337e019d 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -163,7 +163,7 @@ def transform_first(self, fn, lazy=False): fn : callable A transformer function that takes the first element of a sample as input and returns the transformed element. - lazy : bool, default True + lazy : bool, default False If False, transforms all samples at once. Otherwise, transforms each sample on demand. Note that if `fn` is stochastic, you must set lazy to True or you will diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index a30e6ae78701..28dafa22592d 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -37,7 +37,7 @@ def evaluate_accuracy(data_iterator, net): def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): - """The function responsible for running the training the model.""" + """Function responsible for running the training the model.""" if not train_dir or not os.path.exists(train_dir) or not train_csv: warnings.warn("No train directory could be found ") @@ -100,7 +100,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): if e%5 == 0: train_accuracy = evaluate_accuracy(audio_train_loader, net) - print("Epoch %s. Loss: %s Train accuracy : %s " % (e, cumulative_loss/num_examples, train_accuracy)) + print("Epoch {}. Loss: {} Train accuracy : {} ".format(e, cumulative_loss/num_examples, train_accuracy)) print("\n------------------------------\n") train_accuracy = evaluate_accuracy(audio_train_loader, net) From 51101f2c27cac4aec6787259f07f1e82c8673391 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Wed, 28 Nov 2018 09:36:03 -0800 Subject: [PATCH 14/21] Passing lazy=lazy from argument --- example/gluon/urban_sounds/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 39c3337e019d..31c816dceb68 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -175,4 +175,4 @@ def transform_first(self, fn, lazy=False): The transformed dataset. """ - return super(AudioFolderDataset, self).transform_first(fn, lazy=False) + return super(AudioFolderDataset, self).transform_first(fn, lazy=lazy) From c41b9b39560f80e115e6384154a487e55a543a79 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Wed, 28 Nov 2018 15:45:45 -0800 Subject: [PATCH 15/21] Added PR comments, labels to README.MD --- example/gluon/urban_sounds/README.md | 51 ++++++++++++++++++------ example/gluon/urban_sounds/datasets.py | 5 ++- example/gluon/urban_sounds/predict.py | 7 ++-- example/gluon/urban_sounds/train.py | 21 +++++----- example/gluon/urban_sounds/transforms.py | 3 +- 5 files changed, 57 insertions(+), 30 deletions(-) diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index 76e6c20d4cd3..35d53963b7b9 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -8,16 +8,29 @@ After logging in, the data set can be downloaded. The details of the dataset and the link to download it are given below: -Urban Sounds Dataset: -## Description +##Urban Sounds Dataset: +### Description The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on. - The task is to classify these audio samples into one of the 10 labels. + The task is to classify these audio samples into one of the following 10 labels: + ``` + siren, + street_music, + drilling, + dog_bark, + children_playing, + gun_shot, + engine_idling, + air_conditioner, + jackhammer, + car_horn + ``` To be able to run this example: -1. `pip install -r ./requirements.txt` +1. `pip install -r requirements.txt` - This step installs the required libraries to run the example. + If you are in the directory where the requirements.txt file lies, + this step installs the required libraries to run the example. The main dependency that is required is: Librosa. The version used to test the example is: `0.6.2` For more details, refer here: @@ -26,9 +39,21 @@ To be able to run this example: 2. Download the dataset(train.zip, test.zip) required for this example from the location: https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU -3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\ +3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely, **Train** and **Test** and two csv files - **train.csv**, **test.csv** + Assuming you are in a directory *"UrbanSounds"*, after downloading and extracting train.zip, the folder structure should be: + + ``` + UrbanSounds + - Train + - 0.wav, 1.wav ... + - train.csv + - datasets.py + - train.py + - predict.py ... + ``` + 4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/** @@ -36,41 +61,41 @@ https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU For information on the current design of how the AudioFolderDataset is implemented, refer below: **https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio** -## Usage +### Usage For training: -- arguments +- Arguments - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train" - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv" - epochs : Number of epochs to train the model. Default = 30 - batch_size : The batch size for training. Default = 32 -###### default setting +###### To use the default arguments, use: ``` python train.py ``` or -###### manual setting +###### To pass command-line arguments for training data directory, epochs, batch_size, csv file name, use : ``` python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 ``` For prediction: -- arguments +- Arguments - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test" -###### default setting +###### To use the default arguments, use: ``` python predict.py ``` or -###### manual setting +###### To pass command-line arguments for test data directory, use : ``` python train.py --pred ./Test ``` \ No newline at end of file diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py index 31c816dceb68..51c040c8f162 100644 --- a/example/gluon/urban_sounds/datasets.py +++ b/example/gluon/urban_sounds/datasets.py @@ -18,6 +18,7 @@ # coding: utf-8 # pylint: disable= """ Audio Dataset container.""" +from __future__ import print_function __all__ = ['AudioFolderDataset'] import os @@ -75,7 +76,7 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False): self._format = file_format self._train_csv = train_csv if file_format.lower() not in self._exts: - raise RuntimeError("format {} not supported currently.".format(file_format)) + raise RuntimeError("Format {} not supported currently.".format(file_format)) skip_rows = 0 if skip_header: skip_rows = 1 @@ -96,7 +97,7 @@ def _list_audio_files(self, root, skip_rows=0): # train_csv contains mapping between filename and label self._csv_labelled_dataset(root, skip_rows=skip_rows) - #Generating the synset.txt file now + # Generating the synset.txt file now if not os.path.exists("./synset.txt"): with open("./synset.txt", "w") as synsets_file: for item in self.synsets: diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py index 1c37bd0204db..bae51b3251de 100644 --- a/example/gluon/urban_sounds/predict.py +++ b/example/gluon/urban_sounds/predict.py @@ -14,8 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -""" Prediction module for Urban Sounds Classification -""" +""" Prediction module for Urban Sounds Classification""" +from __future__ import print_function import os import warnings import mxnet as mx @@ -25,7 +25,8 @@ try: import librosa except ImportError: - raise ImportError("Librosa is not installed! please run the following command pip install librosa.") + raise ImportError("Librosa is not installed! please run the following command:\ + `pip install librosa`") def predict(prediction_dir='./Test'): """The function is used to run predictions on the audio files in the directory `pred_directory`. diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index 28dafa22592d..2132437f9992 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -14,8 +14,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""The module to run training on the Urban sounds dataset -""" +"""The module to run training on the Urban sounds dataset""" +from __future__ import print_function import os import time import warnings @@ -25,6 +25,7 @@ from transforms import MFCC import model + def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" acc = mx.metric.Accuracy() @@ -87,7 +88,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): batch_size = batch_size num_examples = len(aud_dataset) - for e in range(epochs): + for epoch in range(epochs): cumulative_loss = 0 for data, label in audio_train_loader: with autograd.record(): @@ -98,9 +99,9 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): trainer.step(batch_size) cumulative_loss += mx.nd.sum(loss).asscalar() - if e%5 == 0: + if epoch%5 == 0: train_accuracy = evaluate_accuracy(audio_train_loader, net) - print("Epoch {}. Loss: {} Train accuracy : {} ".format(e, cumulative_loss/num_examples, train_accuracy)) + print("Epoch {}. Loss: {} Train accuracy : {} ".format(epoch, cumulative_loss/num_examples, train_accuracy)) print("\n------------------------------\n") train_accuracy = evaluate_accuracy(audio_train_loader, net) @@ -118,8 +119,8 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): if __name__ == '__main__': training_dir = './Train' training_csv = './train.csv' - eps = 30 - batch_sz = 32 + epochs = 30 + batch_size = 32 try: import argparse @@ -140,10 +141,10 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): training_csv = args.csv if args.epochs: - eps = args.epochs + epochs = args.epochs if args.batch_size: - batch_sz = args.batch_size + batch_size = args.batch_size except ImportError as er: @@ -151,5 +152,5 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): Passing default arguments.") - train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz) + train(train_dir=training_dir, train_csv=training_csv, epochs=epochs, batch_size=batch_size) print("Urban sounds classification Training DONE!") diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py index 9b3f9428f272..8b76d131cdb1 100644 --- a/example/gluon/urban_sounds/transforms.py +++ b/example/gluon/urban_sounds/transforms.py @@ -14,10 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - # coding: utf-8 # pylint: disable= arguments-differ -"Audio transforms." +"""Audio transforms.""" import warnings import numpy as np From 5eef58f2d3f8089f21483de770d793eff47aae48 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Wed, 28 Nov 2018 19:22:36 -0800 Subject: [PATCH 16/21] Trigger CI From 2465b0c4b7ae42b8d8283e99c1430da024524a4e Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Thu, 29 Nov 2018 13:28:10 -0800 Subject: [PATCH 17/21] Addressing PR Comments in README --- example/gluon/urban_sounds/README.md | 2 +- example/gluon/urban_sounds/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index 35d53963b7b9..92c201614fa5 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -1,4 +1,4 @@ -# Urban Sounds classification in MXNet +# Urban Sounds Classification in MXNet Gluon This example provides an end-to-end pipeline for a common datahack competition - Urban Sounds Classification Example. Below is the link to the competition: diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py index 2132437f9992..e475e238a21e 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/urban_sounds/train.py @@ -124,7 +124,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): try: import argparse - parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet Gluon") + parser = argparse.ArgumentParser(description="Urban Sounds classification example - MXNet Gluon") parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str) parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\ to label mapping", type=str) From 4e0d54152db2671251f4f2319b88f1acfdcb7c9d Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Thu, 29 Nov 2018 13:40:22 -0800 Subject: [PATCH 18/21] Modified README.md --- example/gluon/urban_sounds/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md index 92c201614fa5..af95b2653226 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/urban_sounds/README.md @@ -8,7 +8,7 @@ After logging in, the data set can be downloaded. The details of the dataset and the link to download it are given below: -##Urban Sounds Dataset: +## Urban Sounds Dataset: ### Description The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on. The task is to classify these audio samples into one of the following 10 labels: From 74106e0365702ae6766b501543adce1e8ba05a93 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Thu, 29 Nov 2018 18:49:06 -0800 Subject: [PATCH 19/21] Added example under audio folder --- example/gluon/{urban_sounds => audio}/transforms.py | 0 example/gluon/{ => audio}/urban_sounds/README.md | 9 ++++----- example/gluon/{ => audio}/urban_sounds/datasets.py | 0 example/gluon/{ => audio}/urban_sounds/model.py | 0 example/gluon/{ => audio}/urban_sounds/predict.py | 4 +++- example/gluon/{ => audio}/urban_sounds/requirements.txt | 0 example/gluon/{ => audio}/urban_sounds/train.py | 5 +++-- 7 files changed, 10 insertions(+), 8 deletions(-) rename example/gluon/{urban_sounds => audio}/transforms.py (100%) rename example/gluon/{ => audio}/urban_sounds/README.md (92%) rename example/gluon/{ => audio}/urban_sounds/datasets.py (100%) rename example/gluon/{ => audio}/urban_sounds/model.py (100%) rename example/gluon/{ => audio}/urban_sounds/predict.py (98%) rename example/gluon/{ => audio}/urban_sounds/requirements.txt (100%) rename example/gluon/{ => audio}/urban_sounds/train.py (98%) diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/audio/transforms.py similarity index 100% rename from example/gluon/urban_sounds/transforms.py rename to example/gluon/audio/transforms.py diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/audio/urban_sounds/README.md similarity index 92% rename from example/gluon/urban_sounds/README.md rename to example/gluon/audio/urban_sounds/README.md index af95b2653226..c85d29db2e5a 100644 --- a/example/gluon/urban_sounds/README.md +++ b/example/gluon/audio/urban_sounds/README.md @@ -34,7 +34,7 @@ To be able to run this example: The main dependency that is required is: Librosa. The version used to test the example is: `0.6.2` For more details, refer here: -*https://librosa.github.io/librosa/install.html* +https://librosa.github.io/librosa/install.html 2. Download the dataset(train.zip, test.zip) required for this example from the location: https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU @@ -49,17 +49,16 @@ https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU - Train - 0.wav, 1.wav ... - train.csv - - datasets.py - train.py - predict.py ... ``` -4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/** +4. Apache MXNet is installed on the machine. For instructions, go to the link: https://mxnet.incubator.apache.org/install/ For information on the current design of how the AudioFolderDataset is implemented, refer below: -**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio** +https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio ### Usage @@ -97,5 +96,5 @@ or ###### To pass command-line arguments for test data directory, use : ``` -python train.py --pred ./Test +python predict.py --pred ./Test ``` \ No newline at end of file diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/audio/urban_sounds/datasets.py similarity index 100% rename from example/gluon/urban_sounds/datasets.py rename to example/gluon/audio/urban_sounds/datasets.py diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/audio/urban_sounds/model.py similarity index 100% rename from example/gluon/urban_sounds/model.py rename to example/gluon/audio/urban_sounds/model.py diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/audio/urban_sounds/predict.py similarity index 98% rename from example/gluon/urban_sounds/predict.py rename to example/gluon/audio/urban_sounds/predict.py index bae51b3251de..0c3631173667 100644 --- a/example/gluon/urban_sounds/predict.py +++ b/example/gluon/audio/urban_sounds/predict.py @@ -17,16 +17,17 @@ """ Prediction module for Urban Sounds Classification""" from __future__ import print_function import os +import sys import warnings import mxnet as mx from mxnet import nd -from transforms import MFCC from model import get_net try: import librosa except ImportError: raise ImportError("Librosa is not installed! please run the following command:\ `pip install librosa`") +sys.path.append('../') def predict(prediction_dir='./Test'): """The function is used to run predictions on the audio files in the directory `pred_directory`. @@ -64,6 +65,7 @@ def predict(prediction_dir='./Test'): net.load_parameters("./net.params") file_names = os.listdir(prediction_dir) full_file_names = [os.path.join(prediction_dir, item) for item in file_names] + from transforms import MFCC mfcc = MFCC() print("\nStarting predictions for audio files in ", prediction_dir, " ....\n") for filename in full_file_names: diff --git a/example/gluon/urban_sounds/requirements.txt b/example/gluon/audio/urban_sounds/requirements.txt similarity index 100% rename from example/gluon/urban_sounds/requirements.txt rename to example/gluon/audio/urban_sounds/requirements.txt diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py similarity index 98% rename from example/gluon/urban_sounds/train.py rename to example/gluon/audio/urban_sounds/train.py index e475e238a21e..c88f9fb55187 100644 --- a/example/gluon/urban_sounds/train.py +++ b/example/gluon/audio/urban_sounds/train.py @@ -16,15 +16,15 @@ # under the License. """The module to run training on the Urban sounds dataset""" from __future__ import print_function +import sys import os import time import warnings import mxnet as mx from mxnet import gluon, nd, autograd from datasets import AudioFolderDataset -from transforms import MFCC import model - +sys.path.append('../') def evaluate_accuracy(data_iterator, net): """Function to evaluate accuracy of any data iterator passed to it as an argument""" @@ -73,6 +73,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32): print("Loading the dataset to the Gluon's OOTB Dataloader...") #Getting the data loader out of the AudioDataset and passing the transform + from transforms import MFCC aud_transform = MFCC() tick = time.time() From 5eb923ed34f98959fede75464a77964a632166ba Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Fri, 30 Nov 2018 14:22:37 -0800 Subject: [PATCH 20/21] Retrigger CI From 5461bc78efe0bd01c92849449c2e2cbd1c8396d0 Mon Sep 17 00:00:00 2001 From: gaurav-gireesh Date: Fri, 30 Nov 2018 17:24:25 -0800 Subject: [PATCH 21/21] Retrigger CI