From 7fee29a52fa028ce0daa52cde128dff529897aee Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Fri, 16 Nov 2018 18:53:03 -0800
Subject: [PATCH 01/21] Initialized the example

---
 example/gluon/urban_sounds/datasets.py   | 174 +++++++++++++++++++
 example/gluon/urban_sounds/model.py      |  34 ++++
 example/gluon/urban_sounds/predict.py    |  91 ++++++++++
 example/gluon/urban_sounds/train.py      | 165 ++++++++++++++++++
 example/gluon/urban_sounds/transforms.py | 210 +++++++++++++++++++++++
 5 files changed, 674 insertions(+)
 create mode 100644 example/gluon/urban_sounds/datasets.py
 create mode 100644 example/gluon/urban_sounds/model.py
 create mode 100644 example/gluon/urban_sounds/predict.py
 create mode 100644 example/gluon/urban_sounds/train.py
 create mode 100644 example/gluon/urban_sounds/transforms.py

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
new file mode 100644
index 000000000000..013f8fa42da1
--- /dev/null
+++ b/example/gluon/urban_sounds/datasets.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable=
+""" Audio Dataset container."""
+__all__ = ['AudioFolderDataset']
+
+import os
+import warnings
+import mxnet as mx
+from mxnet.gluon.data import Dataset
+from mxnet import ndarray as nd
+try:
+    import librosa
+except ImportError as e:
+    warnings.warn("gluon/contrib/data/audio/datasets.py : librosa dependency could not be resolved or \
+    imported, could not load audio onto the numpy array.")
+
+
+class AudioFolderDataset(Dataset):
+    """A dataset for loading Audio files stored in a folder structure like::
+
+        root/children_playing/0.wav
+        root/siren/23.wav
+        root/drilling/26.wav
+        root/dog_barking/42.wav
+            OR
+        Files(wav) and a csv file that has filename and associated label
+
+    Parameters
+    ----------
+    root : str
+        Path to root directory.
+    transform : callable, default None
+        A function that takes data and label and transforms them
+    train_csv: str, default None
+       train_csv should be populated by the training csv filename
+    file_format: str, default '.wav'
+        The format of the audio files(.wav, .mp3)
+    skip_rows: int, default 0
+        While reading from csv file, how many rows to skip at the start of the file to avoid reading in header
+
+    Attributes
+    ----------
+    synsets : list
+        List of class names. `synsets[i]` is the name for the integer label `i`
+    items : list of tuples
+        List of all audio in (filename, label) pairs.
+    """
+    def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
+        if not librosa:
+            warnings.warn("pip install librosa to continue.")
+            return
+        self._root = os.path.expanduser(root)
+        self._exts = ['.wav']
+        self._format = file_format
+        self._train_csv = train_csv
+        if file_format.lower() not in self._exts:
+            warnings.warn("format {} not supported currently.".format(file_format))
+            return
+        self._list_audio_files(self._root, skip_rows=skip_rows)
+
+
+    def _list_audio_files(self, root, skip_rows=0):
+        """
+            Populates synsets - a map of index to label for the data items.
+            Populates the data in the dataset, making tuples of (data, label)
+        """
+        self.synsets = []
+        self.items = []
+        if self._train_csv is None:
+            for folder in sorted(os.listdir(root)):
+                path = os.path.join(root, folder)
+                if not os.path.isdir(path):
+                    warnings.warn('Ignoring %s, which is not a directory.'%path, stacklevel=3)
+                    continue
+                label = len(self.synsets)
+                self.synsets.append(folder)
+                for filename in sorted(os.listdir(path)):
+                    file_name = os.path.join(path, filename)
+                    ext = os.path.splitext(file_name)[1]
+                    if ext.lower() not in self._exts:
+                        warnings.warn('Ignoring %s of type %s. Only support %s'%(filename, ext, ', '.join(self._exts)))
+                        continue
+                    self.items.append((file_name, label))
+        else:
+            data_tmp = []
+            label_tmp = []
+            skipped_rows = 0
+            with open(self._train_csv, "r") as traincsv:
+                for line in traincsv:
+                    skipped_rows = skipped_rows + 1
+                    if skipped_rows <= skip_rows:
+                        continue
+                    filename = os.path.join(root, line.split(",")[0])
+                    label = line.split(",")[1].strip()
+                    if label not in self.synsets:
+                        self.synsets.append(label)
+                    data_tmp.append(os.path.join(self._root, line.split(",")[0]))
+                    label_tmp.append(self.synsets.index(label))
+
+            #Generating the synset.txt file now
+            with open("./synset.txt", "w") as synsets_file:
+                for item in self.synsets:
+                    synsets_file.write(item+os.linesep)
+            print("Synsets is generated  as synset.txt")
+
+            self._label = nd.array(label_tmp)
+            for i, _ in enumerate(data_tmp):
+                if self._format not in data_tmp[i]:
+                    self.items.append((data_tmp[i]+self._format, self._label[i]))
+
+    def __getitem__(self, idx):
+        """
+            Retrieve the item (data, label) stored at idx in items
+        """
+        filename = self.items[idx][0]
+        label = self.items[idx][1]
+
+        if librosa is not None:
+            X1, _ = librosa.load(filename, res_type='kaiser_fast')
+            return nd.array(X1), label
+
+        else:
+            warnings.warn(" Dependency librosa is not installed! \
+            Cannot load the audio(wav) file into the numpy.ndarray.")
+            return self.items[idx][0], self.items[idx][1]
+
+    def __len__(self):
+        """
+            Retrieves the number of items in the dataset
+        """
+        return len(self.items)
+
+
+    def transform_first(self, fn, lazy=True):
+        """Returns a new dataset with the first element of each sample
+        transformed by the transformer function `fn`.
+
+        This is useful, for example, when you only want to transform data
+        while keeping label as is.
+
+        Parameters
+        ----------
+        fn : callable
+            A transformer function that takes the first elemtn of a sample
+            as input and returns the transformed element.
+        lazy : bool, default True
+            If False, transforms all samples at once. Otherwise,
+            transforms each sample on demand. Note that if `fn`
+            is stochastic, you must set lazy to True or you will
+            get the same result on all epochs.
+
+        Returns
+        -------
+        Dataset
+            The transformed dataset.
+        """
+        return super(AudioFolderDataset, self).transform_first(fn, lazy=False)
diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py
new file mode 100644
index 000000000000..3b3c3500c2bb
--- /dev/null
+++ b/example/gluon/urban_sounds/model.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+    This module builds a model an MLP with a configurable output layer( number of units in the last layer).
+    Users can pass any number of units in the last layer. SInce this dataset has 10 labels,
+    the default value of num_labels = 10
+"""
+import mxnet as mx
+from mxnet import gluon
+
+# Defining a neural network with number of labels
+def get_net(num_labels=10):
+    net = gluon.nn.Sequential()
+    with net.name_scope():
+        net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes)
+        net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer
+        net.add(gluon.nn.Dense(num_labels))
+    net.collect_params().initialize(mx.init.Normal(1.))
+    return net
diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py
new file mode 100644
index 000000000000..4bbecb481bb9
--- /dev/null
+++ b/example/gluon/urban_sounds/predict.py
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+""" Prediction module for Urban Sounds Classification
+"""
+import os
+import warnings
+import mxnet as mx
+from mxnet import nd
+from transforms import MFCC
+from model import get_net
+
+def predict(prediction_dir='./Test'):
+    """The function is used to run predictions on the audio files in the directory `pred_directory`.
+
+    Parameters
+    ----------
+    net: 
+        The model that has been trained.
+    prediction_dir: string, default ./Test
+        The directory that contains the audio files on which predictions are to be made
+        
+    """
+
+    try:
+        import librosa
+    except ImportError:
+        warnings.warn("Librosa is not installed! please run the following command pip install librosa.")
+        return
+
+    if not os.path.exists(prediction_dir):
+        warnings.warn("The directory on which predictions are to be made is not found!")
+        return
+
+    if len(os.listdir(prediction_dir)) == 0:
+        warnings.warn("The directory on which predictions are to be made is empty! Exiting...")
+        return
+
+    # Loading synsets
+    if not os.path.exists('./synset.txt'):
+        warnings.warn("The synset or labels for the dataset do not exist. Please run the training script first.")
+        return
+
+    with open("./synset.txt", "r") as f:
+        synset = [l.rstrip() for l in f]
+    net = get_net(len(synset))
+    print("Trying to load the model with the saved parameters...")
+    if not os.path.exists("./net.params"):
+        warnings.warn("The model does not have any saved parameters... Cannot proceed! Train the model first")
+        return
+
+    net.load_parameters("./net.params")
+    file_names = os.listdir(prediction_dir)
+    full_file_names = [os.path.join(prediction_dir, item) for item in file_names]
+    mfcc = MFCC()
+    print("\nStarting predictions for audio files in ", prediction_dir, " ....\n")
+    for filename in full_file_names:
+        # Argument kaiser_fast to res_type is faster than 'kaiser_best'. To reduce the load time, passing kaiser_fast.
+        X1, _ = librosa.load(filename, res_type='kaiser_fast')
+        transformed_test_data = mfcc(mx.nd.array(X1))
+        output = net(transformed_test_data.reshape((1, -1)))
+        prediction = nd.argmax(output, axis=1)
+        print(filename, " -> ", synset[(int)(prediction.asscalar())])
+
+
+if __name__ == '__main__':
+    try:
+        import argparse
+        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet")
+        parser.add_argument('--pred', '-p', help="Enter the folder path that contains your audio files", type=str)
+        args = parser.parse_args()
+        pred_dir = args.pred
+
+    except ImportError:
+        warnings.warn("Argparse module not installed! passing default arguments.")
+        pred_dir = './Test'
+    predict(prediction_dir=pred_dir)
+    print("Urban sounds classification Prediction DONE!")
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
new file mode 100644
index 000000000000..2e12a85aa90b
--- /dev/null
+++ b/example/gluon/urban_sounds/train.py
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""The module to run training on the Urban sounds dataset
+"""
+import os
+import time
+import warnings
+import mxnet as mx
+from mxnet import gluon, nd, autograd
+from datasets import AudioFolderDataset
+from transforms import MFCC
+import model
+
+def evaluate_accuracy(data_iterator, net):
+    """Function to evaluate accuracy of any data iterator passed to it as an argument"""
+    acc = mx.metric.Accuracy()
+    for _, (data, label) in enumerate(data_iterator):
+        output = net(data)
+        predictions = nd.argmax(output, axis=1)
+        predictions = predictions.reshape((-1, 1))
+        acc.update(preds=predictions, labels=label)
+    return acc.get()[1]
+
+
+def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
+    """The function responsible for running the training the model."""
+    try:
+        import librosa
+    except ImportError:
+        warnings.warn("The dependency librosa is not installed. Cannot continue")
+        return
+    if not train_dir or not os.path.exists(train_dir) or not train_csv:
+        warnings.warn("No train directory could be found ")
+        return
+    # Make a dataset from the local folder containing Audio data
+    print("\nMaking an Audio Dataset...\n")
+    tick = time.time()
+    aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_rows=1)
+    tock = time.time()
+
+    print("Loading the dataset took ", (tock-tick), " seconds.")
+    print("\n=======================================\n")
+    print("Number of output classes = ", len(aud_dataset.synsets))
+    print("\nThe labels are : \n")
+    print(aud_dataset.synsets)
+    # Get the model to train
+    net = model.get_net(len(aud_dataset.synsets))
+    print("\nNeural Network = \n")
+    print(net)
+    print("\nModel - Neural Network Generated!\n")
+    print("=======================================\n")
+
+    #Define the loss - Softmax CE Loss
+    softmax_loss = gluon.loss.SoftmaxCELoss(from_logits=False, sparse_label=True)
+    print("Loss function initialized!\n")
+    print("=======================================\n")
+
+    #Define the trainer with the optimizer
+    trainer = gluon.Trainer(net.collect_params(), 'adadelta')
+    print("Optimizer - Trainer function initialized!\n")
+    print("=======================================\n")
+    print("Loading the dataset to the Gluon's OOTB Dataloader...")
+
+    #Getting the data loader out of the AudioDataset and passing the transform
+    aud_transform = MFCC()
+    tick = time.time()
+
+    audio_train_loader = gluon.data.DataLoader(aud_dataset.transform_first(aud_transform), batch_size=32, shuffle=True)
+    tock = time.time()
+    print("Time taken to load data and apply transform here is ", (tock-tick), " seconds.")
+    print("=======================================\n")
+
+
+    print("Starting the training....\n")
+    # Training loop
+    tick = time.time()
+    batch_size = batch_size
+    num_examples = len(aud_dataset)
+
+    for e in range(epochs):
+        cumulative_loss = 0
+        for _, (data, label) in enumerate(audio_train_loader):
+            with autograd.record():
+                output = net(data)
+                loss = softmax_loss(output, label)
+            loss.backward()
+
+            trainer.step(batch_size)
+            cumulative_loss += mx.nd.sum(loss).asscalar()
+
+        if e%5 == 0:
+            train_accuracy = evaluate_accuracy(audio_train_loader, net)
+            print("Epoch %s. Loss: %s Train accuracy : %s " % (e, cumulative_loss/num_examples, train_accuracy))
+            print("\n------------------------------\n")
+
+    train_accuracy = evaluate_accuracy(audio_train_loader, net)
+    tock = time.time()
+    print("\nFinal training accuracy: ", train_accuracy)
+
+    print("Training the sound classification for ", epochs, " epochs, MLP model took ", (tock-tick), " seconds")
+    print("====================== END ======================\n")
+
+    print("Trying to save the model parameters here...")
+    net.save_parameters("./net.params")
+    print("Saved the model parameters in current directory.")
+
+
+if __name__ == '__main__':
+
+    try:
+        import argparse
+        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet")
+        parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str)
+        parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\
+        to label mapping", type=str)
+        parser.add_argument('--epochs', '-e', help="Enter the number of epochs \
+        you would want to run the training for.", type=int)
+        parser.add_argument('--batch_size', '-b', help="Enter the batch_size of data", type=int)
+        args = parser.parse_args()
+
+        if args:
+            if args.train:
+                training_dir = args.train
+            else:
+                training_dir = './Train'
+
+            if args.csv:
+                training_csv = args.csv
+            else:
+                training_csv = './train.csv'
+
+            if args.epochs:
+                eps = args.epochs
+            else:
+                eps = 30
+
+            if args.batch_size:
+                batch_sz = args.batch_size
+            else:
+                batch_sz = 32
+
+    except ImportError as er:
+        warnings.warn("Argument parsing module could not be imported \
+        Passing default arguments.")
+        training_dir = './Train'
+        training_csv = './train.csv'
+        eps = 30
+        batch_sz = 32
+
+    train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz)
+    print("Urban sounds classification Training DONE!")
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
new file mode 100644
index 000000000000..ec626e42fb0b
--- /dev/null
+++ b/example/gluon/urban_sounds/transforms.py
@@ -0,0 +1,210 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable= arguments-differ
+"Audio transforms."
+
+import warnings
+import numpy as np
+try:
+    import librosa
+except ImportError as e:
+    warnings.warn("gluon/contrib/data/audio/transforms.py : librosa dependency could not be resolved or \
+    imported, could not provide some/all transform.")
+
+from mxnet import ndarray as nd
+from mxnet.gluon.block import Block
+
+class MFCC(Block):
+    """Extracts Mel frequency cepstrum coefficients from the audio data file
+    More details : https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html
+
+    Attributes
+    ----------
+    sampling_rate: int, default 22050
+        sampling rate of the input audio signal
+    num_mfcc: int, default 20
+        number of mfccs to return
+
+
+    Inputs:
+        - **x**: input tensor (samples, ) shape.
+
+    Outputs:
+        - **out**: output array is a scaled NDArray with (samples, ) shape.
+
+    """
+
+    def __init__(self, sampling_rate=22050, num_mfcc=20):
+        self._sampling_rate = sampling_rate
+        self._num_fcc = num_mfcc
+        super(MFCC, self).__init__()
+
+    def forward(self, x):
+        if not librosa:
+            warnings.warn("Librosa dependency is not installed! Install that and retry")
+            return x
+        if isinstance(x, np.ndarray):
+            y = x
+        elif isinstance(x, nd.NDArray):
+            y = x.asnumpy()
+        else:
+            warnings.warn("MFCC - allowed datatypes mx.nd.NDArray and numpy.ndarray")
+            return x
+
+        audio_tmp = np.mean(librosa.feature.mfcc(y=y, sr=self._sampling_rate, n_mfcc=self._num_fcc).T, axis=0)
+        return nd.array(audio_tmp)
+
+
+class Scale(Block):
+    """Scale audio numpy.ndarray from a 16-bit integer to a floating point number between
+    -1.0 and 1.0. The 16-bit integer is the sample resolution or bit depth.
+
+    Attributes
+    ----------
+    scale_factor : float
+        The factor to scale the input tensor by.
+
+
+    Inputs:
+        - **x**: input tensor (samples, ) shape.
+
+    Outputs:
+        - **out**: output array is a scaled NDArray with (samples, ) shape.
+
+    Examples
+    --------
+    >>> scale = audio.transforms.Scale(scale_factor=2)
+    >>> audio_samples = mx.nd.array([2,3,4])
+    >>> scale(audio_samples)
+    [1.  1.5 2. ]
+    <NDArray 3 @cpu(0)>
+
+    """
+
+    def __init__(self, scale_factor=2**31):
+        self.scale_factor = scale_factor
+        super(Scale, self).__init__()
+
+    def forward(self, x):
+        if isinstance(x, np.ndarray):
+            return nd.array(x/self.scale_factor)
+        return x / self.scale_factor
+
+
+class PadTrim(Block):
+    """Pad/Trim a 1d-NDArray of NPArray (Signal or Labels)
+
+    Attributes
+    ----------
+    max_len : int
+        Length to which the array will be padded or trimmed to.
+    fill_value: int or float
+        If there is a need of padding, what value to padd at the end of the input array
+
+
+    Inputs:
+        - **x**: input tensor (samples, ) shape.
+
+    Outputs:
+        - **out**: output array is a scaled NDArray with (max_len, ) shape.
+
+    Examples
+    --------
+    >>> padtrim = audio.transforms.PadTrim(max_len=9, fill_value=0)
+    >>> audio_samples = mx.nd.array([1,2,3,4,5])
+    >>> padtrim(audio_samples)
+    [1. 2. 3. 4. 5. 0. 0. 0. 0.]
+    <NDArray 9 @cpu(0)>
+
+    """
+
+    def __init__(self, max_len, fill_value=0):
+        self._max_len = max_len
+        self._fill_value = fill_value
+        super(PadTrim, self).__init__()
+
+    def forward(self, x):
+        if  isinstance(x, np.ndarray):
+            x = nd.array(x)
+        if self._max_len > x.size:
+            pad = nd.ones((self._max_len - x.size,)) * self._fill_value
+            x = nd.concat(x, pad, dim=0)
+        elif self._max_len < x.size:
+            x = x[:self._max_len]
+        return x
+
+
+class MEL(Block):
+    """Create MEL Spectrograms from a raw audio signal. Relatively pretty slow.
+
+    Attributes
+    ----------
+    sampling_rate: int, default 22050
+        sampling rate of the input audio signal
+    num_fft: int, default 2048
+        length of the Fast fourier transform window
+    num_mels: int, default 20
+        number of mel bands to generate
+    hop_length: int, default 512
+        total samples between successive frames
+
+
+    Inputs:
+        - **x**: input tensor (samples, ) shape.
+
+    Outputs:
+        - **out**: output array which consists of mel spectograms, shape = (n_mels, 1)
+
+       Usage (see librosa.feature.melspectrogram docs):
+           MEL(sr=16000, n_fft=1600, hop_length=800, n_mels=64)
+
+    Examples
+    --------
+    >>> mel = audio.transforms.MEL()
+    >>> audio_samples = mx.nd.array([1,2,3,4,5])
+    >>> mel(audio_samples)
+    [[3.81801406e+04]
+    [9.86858240e-29]
+    [1.87405472e-29]
+    [2.38637225e-29]
+    [3.94043010e-29]
+    [3.67071565e-29]
+    [7.29390295e-29]
+    [8.84324438e-30]...
+    <NDArray 128x1 @cpu(0)>
+
+    """
+
+    def __init__(self, sampling_rate=22050, num_fft=2048, num_mels=20, hop_length=512):
+        self._sampling_rate = sampling_rate
+        self._num_fft = num_fft
+        self._num_mels = num_mels
+        self._hop_length = hop_length
+        super(MEL, self).__init__()
+
+    def forward(self, x):
+        if librosa is None:
+            warnings.warn("Cannot create spectrograms, since dependency librosa is not installed!")
+            return x
+        if isinstance(x, nd.NDArray):
+            x = x.asnumpy()
+        specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\
+        n_fft=self._num_fft, n_mels=self._num_mels, hop_length=self._hop_length)
+        return nd.array(specs)
+ 
\ No newline at end of file

From 8360a4e22ac77268911bd193d19aa1947296defc Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Mon, 19 Nov 2018 17:38:51 -0800
Subject: [PATCH 02/21] Addressed PR comments, about existing synset.txt file -
 no overwrite

---
 example/gluon/urban_sounds/datasets.py   | 17 +++++++++--------
 example/gluon/urban_sounds/model.py      |  7 +++----
 example/gluon/urban_sounds/predict.py    |  4 ++--
 example/gluon/urban_sounds/train.py      |  6 +-----
 example/gluon/urban_sounds/transforms.py |  6 ------
 5 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 013f8fa42da1..52280c467592 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -22,7 +22,6 @@
 
 import os
 import warnings
-import mxnet as mx
 from mxnet.gluon.data import Dataset
 from mxnet import ndarray as nd
 try:
@@ -77,9 +76,8 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
 
 
     def _list_audio_files(self, root, skip_rows=0):
-        """
-            Populates synsets - a map of index to label for the data items.
-            Populates the data in the dataset, making tuples of (data, label)
+        """Populates synsets - a map of index to label for the data items.
+        Populates the data in the dataset, making tuples of (data, label)
         """
         self.synsets = []
         self.items = []
@@ -115,10 +113,13 @@ def _list_audio_files(self, root, skip_rows=0):
                     label_tmp.append(self.synsets.index(label))
 
             #Generating the synset.txt file now
-            with open("./synset.txt", "w") as synsets_file:
-                for item in self.synsets:
-                    synsets_file.write(item+os.linesep)
-            print("Synsets is generated  as synset.txt")
+            if not os.path.exists("./synset.txt"):
+                with open("./synset.txt", "w") as synsets_file:
+                    for item in self.synsets:
+                        synsets_file.write(item+os.linesep)
+                print("Synsets is generated  as synset.txt")
+            else:
+                warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.")
 
             self._label = nd.array(label_tmp)
             for i, _ in enumerate(data_tmp):
diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py
index 3b3c3500c2bb..5933aaa57b6f 100644
--- a/example/gluon/urban_sounds/model.py
+++ b/example/gluon/urban_sounds/model.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""
-    This module builds a model an MLP with a configurable output layer( number of units in the last layer).
-    Users can pass any number of units in the last layer. SInce this dataset has 10 labels,
-    the default value of num_labels = 10
+"""This module builds a model an MLP with a configurable output layer( number of units in the last layer).
+Users can pass any number of units in the last layer. SInce this dataset has 10 labels,
+the default value of num_labels = 10
 """
 import mxnet as mx
 from mxnet import gluon
diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py
index 4bbecb481bb9..9b92541bb27b 100644
--- a/example/gluon/urban_sounds/predict.py
+++ b/example/gluon/urban_sounds/predict.py
@@ -28,11 +28,11 @@ def predict(prediction_dir='./Test'):
 
     Parameters
     ----------
-    net: 
+    net:
         The model that has been trained.
     prediction_dir: string, default ./Test
         The directory that contains the audio files on which predictions are to be made
-        
+
     """
 
     try:
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index 2e12a85aa90b..04c8f20b879f 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -38,11 +38,7 @@ def evaluate_accuracy(data_iterator, net):
 
 def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
     """The function responsible for running the training the model."""
-    try:
-        import librosa
-    except ImportError:
-        warnings.warn("The dependency librosa is not installed. Cannot continue")
-        return
+
     if not train_dir or not os.path.exists(train_dir) or not train_csv:
         warnings.warn("No train directory could be found ")
         return
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index ec626e42fb0b..ef079aa61ec3 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -56,9 +56,6 @@ def __init__(self, sampling_rate=22050, num_mfcc=20):
         super(MFCC, self).__init__()
 
     def forward(self, x):
-        if not librosa:
-            warnings.warn("Librosa dependency is not installed! Install that and retry")
-            return x
         if isinstance(x, np.ndarray):
             y = x
         elif isinstance(x, nd.NDArray):
@@ -199,9 +196,6 @@ def __init__(self, sampling_rate=22050, num_fft=2048, num_mels=20, hop_length=51
         super(MEL, self).__init__()
 
     def forward(self, x):
-        if librosa is None:
-            warnings.warn("Cannot create spectrograms, since dependency librosa is not installed!")
-            return x
         if isinstance(x, nd.NDArray):
             x = x.asnumpy()
         specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\

From 5e006827dcdc8965fbc59c668fb5aa2e48ac9968 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Mon, 19 Nov 2018 17:44:46 -0800
Subject: [PATCH 03/21] RST - docstring issues fixed

---
 example/gluon/urban_sounds/datasets.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 52280c467592..dc760bf74fb4 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -54,12 +54,14 @@ class AudioFolderDataset(Dataset):
     skip_rows: int, default 0
         While reading from csv file, how many rows to skip at the start of the file to avoid reading in header
 
+
     Attributes
     ----------
     synsets : list
         List of class names. `synsets[i]` is the name for the integer label `i`
     items : list of tuples
         List of all audio in (filename, label) pairs.
+
     """
     def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
         if not librosa:
@@ -127,9 +129,7 @@ def _list_audio_files(self, root, skip_rows=0):
                     self.items.append((data_tmp[i]+self._format, self._label[i]))
 
     def __getitem__(self, idx):
-        """
-            Retrieve the item (data, label) stored at idx in items
-        """
+        """Retrieve the item (data, label) stored at idx in items"""
         filename = self.items[idx][0]
         label = self.items[idx][1]
 
@@ -143,9 +143,7 @@ def __getitem__(self, idx):
             return self.items[idx][0], self.items[idx][1]
 
     def __len__(self):
-        """
-            Retrieves the number of items in the dataset
-        """
+        """Retrieves the number of items in the dataset"""
         return len(self.items)
 
 
@@ -171,5 +169,6 @@ def transform_first(self, fn, lazy=True):
         -------
         Dataset
             The transformed dataset.
+
         """
         return super(AudioFolderDataset, self).transform_first(fn, lazy=False)

From 3385a7d84d7f2ee89b3ddd0c3c2e0beeaddfb4ce Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Mon, 19 Nov 2018 17:56:25 -0800
Subject: [PATCH 04/21] added README

---
 example/gluon/urban_sounds/README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 example/gluon/urban_sounds/README.md

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
new file mode 100644
index 000000000000..f7e33136092f
--- /dev/null
+++ b/example/gluon/urban_sounds/README.md
@@ -0,0 +1,22 @@
+# Urban Sounds classification in MXNet
+
+Urban Sounds Dataset:
+## Description
+  The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
+  The task is to classify these audio samples into one of the 10 labels.
+
+To be able to run this example:
+
+1. Download the dataset(train.zip, test.zip) required for this example from the location:
+**https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU**
+  
+
+2. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\
+   **Train** and **Test** and two csv files - **train.csv**, **test.csv**
+
+3. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/**
+
+4. Librosa is installed. To install, use the commands
+   `pip install librosa`,
+   For more details, refer here:
+   **https://librosa.github.io/librosa/install.html**

From 6d029aee3db156cb8ce26e03cbbfd1ebc58d2f2a Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Tue, 20 Nov 2018 12:39:40 -0800
Subject: [PATCH 05/21] Addressed PR comments

---
 example/gluon/urban_sounds/README.md     |  4 ++++
 example/gluon/urban_sounds/datasets.py   | 13 +++++++++----
 example/gluon/urban_sounds/transforms.py |  2 +-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index f7e33136092f..ad56be6dc38b 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -20,3 +20,7 @@ To be able to run this example:
    `pip install librosa`,
    For more details, refer here:
    **https://librosa.github.io/librosa/install.html**
+
+
+For information on the current design of how the AudioFolderDataset is implemented, refer below:
+**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio**
\ No newline at end of file
diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index dc760bf74fb4..f0529aa05129 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -27,8 +27,8 @@
 try:
     import librosa
 except ImportError as e:
-    warnings.warn("gluon/contrib/data/audio/datasets.py : librosa dependency could not be resolved or \
-    imported, could not load audio onto the numpy array.")
+    warnings.warn("librosa dependency could not be resolved or \
+    imported, could not load audio onto the numpy array. pip install librosa")
 
 
 class AudioFolderDataset(Dataset):
@@ -50,7 +50,7 @@ class AudioFolderDataset(Dataset):
     train_csv: str, default None
        train_csv should be populated by the training csv filename
     file_format: str, default '.wav'
-        The format of the audio files(.wav, .mp3)
+        The format of the audio files(.wav)
     skip_rows: int, default 0
         While reading from csv file, how many rows to skip at the start of the file to avoid reading in header
 
@@ -133,6 +133,9 @@ def __getitem__(self, idx):
         filename = self.items[idx][0]
         label = self.items[idx][1]
 
+        # res_type is resampling type for the audio signal
+        # can be passed values like 'kaiser_best', 'kaiser_fast'. 'kaiser_fast' performs better and used
+        # more than kaiser_best
         if librosa is not None:
             X1, _ = librosa.load(filename, res_type='kaiser_fast')
             return nd.array(X1), label
@@ -147,12 +150,14 @@ def __len__(self):
         return len(self.items)
 
 
-    def transform_first(self, fn, lazy=True):
+    def transform_first(self, fn, lazy=False):
         """Returns a new dataset with the first element of each sample
         transformed by the transformer function `fn`.
 
         This is useful, for example, when you only want to transform data
         while keeping label as is.
+        lazy=False is passed to transform_first for dataset so that all tramsforms could be performed in
+        one shot and not during training. This is a performance consideration.
 
         Parameters
         ----------
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index ef079aa61ec3..822314238734 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -24,7 +24,7 @@
 try:
     import librosa
 except ImportError as e:
-    warnings.warn("gluon/contrib/data/audio/transforms.py : librosa dependency could not be resolved or \
+    warnings.warn("librosa dependency could not be resolved or \
     imported, could not provide some/all transform.")
 
 from mxnet import ndarray as nd

From 1e30f7c3a9829cadd2ecb421da58b35fe9ba6439 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Tue, 20 Nov 2018 15:13:50 -0800
Subject: [PATCH 06/21] Addressed PR comments, checking Divide by 0

---
 example/gluon/urban_sounds/datasets.py   | 23 +++++++----------------
 example/gluon/urban_sounds/train.py      |  4 ++--
 example/gluon/urban_sounds/transforms.py |  3 +++
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index f0529aa05129..112669acc50d 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -87,7 +87,7 @@ def _list_audio_files(self, root, skip_rows=0):
             for folder in sorted(os.listdir(root)):
                 path = os.path.join(root, folder)
                 if not os.path.isdir(path):
-                    warnings.warn('Ignoring %s, which is not a directory.'%path, stacklevel=3)
+                    warnings.warn('Ignoring {}, which is not a directory.'.format(path))
                     continue
                 label = len(self.synsets)
                 self.synsets.append(folder)
@@ -95,12 +95,11 @@ def _list_audio_files(self, root, skip_rows=0):
                     file_name = os.path.join(path, filename)
                     ext = os.path.splitext(file_name)[1]
                     if ext.lower() not in self._exts:
-                        warnings.warn('Ignoring %s of type %s. Only support %s'%(filename, ext, ', '.join(self._exts)))
+                        warnings.warn('Ignoring {} of type {}. Only support {}'\
+                        .format(filename, ext, ', '.join(self._exts)))
                         continue
                     self.items.append((file_name, label))
         else:
-            data_tmp = []
-            label_tmp = []
             skipped_rows = 0
             with open(self._train_csv, "r") as traincsv:
                 for line in traincsv:
@@ -111,35 +110,27 @@ def _list_audio_files(self, root, skip_rows=0):
                     label = line.split(",")[1].strip()
                     if label not in self.synsets:
                         self.synsets.append(label)
-                    data_tmp.append(os.path.join(self._root, line.split(",")[0]))
-                    label_tmp.append(self.synsets.index(label))
+                    if self._format not in filename:
+                        filename = filename+self._format
+                    self.items.append((filename, nd.array(self.synsets.index(label)).reshape((1,))))
 
             #Generating the synset.txt file now
             if not os.path.exists("./synset.txt"):
                 with open("./synset.txt", "w") as synsets_file:
                     for item in self.synsets:
                         synsets_file.write(item+os.linesep)
-                print("Synsets is generated  as synset.txt")
+                print("Synsets is generated as synset.txt")
             else:
                 warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.")
 
-            self._label = nd.array(label_tmp)
-            for i, _ in enumerate(data_tmp):
-                if self._format not in data_tmp[i]:
-                    self.items.append((data_tmp[i]+self._format, self._label[i]))
 
     def __getitem__(self, idx):
         """Retrieve the item (data, label) stored at idx in items"""
         filename = self.items[idx][0]
         label = self.items[idx][1]
-
-        # res_type is resampling type for the audio signal
-        # can be passed values like 'kaiser_best', 'kaiser_fast'. 'kaiser_fast' performs better and used
-        # more than kaiser_best
         if librosa is not None:
             X1, _ = librosa.load(filename, res_type='kaiser_fast')
             return nd.array(X1), label
-
         else:
             warnings.warn(" Dependency librosa is not installed! \
             Cannot load the audio(wav) file into the numpy.ndarray.")
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index 04c8f20b879f..6dae4e8f18a2 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -28,7 +28,7 @@
 def evaluate_accuracy(data_iterator, net):
     """Function to evaluate accuracy of any data iterator passed to it as an argument"""
     acc = mx.metric.Accuracy()
-    for _, (data, label) in enumerate(data_iterator):
+    for data, label in data_iterator:
         output = net(data)
         predictions = nd.argmax(output, axis=1)
         predictions = predictions.reshape((-1, 1))
@@ -89,7 +89,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
 
     for e in range(epochs):
         cumulative_loss = 0
-        for _, (data, label) in enumerate(audio_train_loader):
+        for data, label in audio_train_loader:
             with autograd.record():
                 output = net(data)
                 loss = softmax_loss(output, label)
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index 822314238734..2e5c807461fe 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -99,6 +99,9 @@ def __init__(self, scale_factor=2**31):
         super(Scale, self).__init__()
 
     def forward(self, x):
+        if self.scale_factor == 0:
+            warnings.warn("Scale factor cannot be 0.")
+            return x
         if isinstance(x, np.ndarray):
             return nd.array(x/self.scale_factor)
         return x / self.scale_factor

From 662749bb1c4b9fc9633b68ecb7ef050f48f54574 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Tue, 20 Nov 2018 15:18:07 -0800
Subject: [PATCH 07/21] Raising error if format is not supported.

---
 example/gluon/urban_sounds/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 112669acc50d..b26b1546778c 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -72,8 +72,8 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
         self._format = file_format
         self._train_csv = train_csv
         if file_format.lower() not in self._exts:
-            warnings.warn("format {} not supported currently.".format(file_format))
-            return
+            raise RuntimeError("format {} not supported currently.".format(file_format))
+
         self._list_audio_files(self._root, skip_rows=skip_rows)
 
 
From acf48c4f95bd64110990201a34b0e6fa4522dff9 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Wed, 21 Nov 2018 09:50:07 -0800
Subject: [PATCH 08/21] changed a line for ndarray of labels

---
 example/gluon/urban_sounds/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index b26b1546778c..7ecc8783dbcf 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -112,7 +112,7 @@ def _list_audio_files(self, root, skip_rows=0):
                         self.synsets.append(label)
                     if self._format not in filename:
                         filename = filename+self._format
-                    self.items.append((filename, nd.array(self.synsets.index(label)).reshape((1,))))
+                    self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,))))
 
             #Generating the synset.txt file now
             if not os.path.exists("./synset.txt"):

From 5e37fb8f7ce19dc279ace479a285199a174f3b26 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Wed, 21 Nov 2018 16:10:50 -0800
Subject: [PATCH 09/21] Trigger CI


From 4fe850c854f58a728ac3cf182c6694a6b01d4b1c Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Thu, 22 Nov 2018 10:35:09 -0800
Subject: [PATCH 10/21] Trigger CI


From 214d4baf259fe067bb7deb60349d31c85d1fa40d Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Mon, 26 Nov 2018 17:47:17 -0800
Subject: [PATCH 11/21] PR comments addressed around skip_header argument

---
 example/gluon/urban_sounds/README.md     | 41 +++++++++++++++++++++++-
 example/gluon/urban_sounds/datasets.py   | 15 +++++----
 example/gluon/urban_sounds/model.py      |  4 +--
 example/gluon/urban_sounds/train.py      |  2 +-
 example/gluon/urban_sounds/transforms.py |  4 +--
 5 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index ad56be6dc38b..f16206f2d4a5 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -23,4 +23,43 @@ To be able to run this example:
 
 
 For information on the current design of how the AudioFolderDataset is implemented, refer below:
-**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio**
\ No newline at end of file
+**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio**
+
+## Usage 
+
+For training:
+
+- arguments
+  - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train"
+  - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv"
+  - epochs : Number of epochs to train the model. Default = 30
+  - batch_size : The batch size for training. Default = 32
+
+
+###### default setting
+```
+python train.py
+``` 
+or
+
+###### manual setting
+```
+python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 
+```
+
+For prediction:
+
+- arguments
+  - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test"
+
+
+###### default setting
+```
+python predict.py
+``` 
+or
+
+###### manual setting
+```
+python train.py --pred ./Test
+```
\ No newline at end of file
diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 7ecc8783dbcf..2b4fe7519e25 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -39,7 +39,7 @@ class AudioFolderDataset(Dataset):
         root/drilling/26.wav
         root/dog_barking/42.wav
             OR
-        Files(wav) and a csv file that has filename and associated label
+        Files(wav) and a csv file that has file name and associated label
 
     Parameters
     ----------
@@ -51,8 +51,8 @@ class AudioFolderDataset(Dataset):
        train_csv should be populated by the training csv filename
     file_format: str, default '.wav'
         The format of the audio files(.wav)
-    skip_rows: int, default 0
-        While reading from csv file, how many rows to skip at the start of the file to avoid reading in header
+    skip_header: boolean, default False
+        While reading from csv file, whether to skip at the start of the file to avoid reading in header
 
 
     Attributes
@@ -63,7 +63,7 @@ class AudioFolderDataset(Dataset):
         List of all audio in (filename, label) pairs.
 
     """
-    def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
+    def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False):
         if not librosa:
             warnings.warn("pip install librosa to continue.")
             return
@@ -73,7 +73,10 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_rows=0):
         self._train_csv = train_csv
         if file_format.lower() not in self._exts:
             raise RuntimeError("format {} not supported currently.".format(file_format))
-
+        if skip_header:
+            skip_rows = 1
+        else:
+            skip_rows = 0
         self._list_audio_files(self._root, skip_rows=skip_rows)
 
 
@@ -153,7 +156,7 @@ def transform_first(self, fn, lazy=False):
         Parameters
         ----------
         fn : callable
-            A transformer function that takes the first elemtn of a sample
+            A transformer function that takes the first element of a sample
             as input and returns the transformed element.
         lazy : bool, default True
             If False, transforms all samples at once. Otherwise,
diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/urban_sounds/model.py
index 5933aaa57b6f..af23cb946e2e 100644
--- a/example/gluon/urban_sounds/model.py
+++ b/example/gluon/urban_sounds/model.py
@@ -27,7 +27,7 @@ def get_net(num_labels=10):
     net = gluon.nn.Sequential()
     with net.name_scope():
         net.add(gluon.nn.Dense(256, activation="relu")) # 1st layer (256 nodes)
-        net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer
+        net.add(gluon.nn.Dense(256, activation="relu")) # 2nd hidden layer ( 256 nodes )
         net.add(gluon.nn.Dense(num_labels))
-    net.collect_params().initialize(mx.init.Normal(1.))
+    net.collect_params().initialize(mx.init.Xavier())
     return net
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index 6dae4e8f18a2..a2e3066bd332 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -45,7 +45,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
     # Make a dataset from the local folder containing Audio data
     print("\nMaking an Audio Dataset...\n")
     tick = time.time()
-    aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_rows=1)
+    aud_dataset = AudioFolderDataset(train_dir, train_csv=train_csv, file_format='.wav', skip_header=True)
     tock = time.time()
 
     print("Loading the dataset took ", (tock-tick), " seconds.")
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index 2e5c807461fe..a75e1543338d 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -115,7 +115,7 @@ class PadTrim(Block):
     max_len : int
         Length to which the array will be padded or trimmed to.
     fill_value: int or float
-        If there is a need of padding, what value to padd at the end of the input array
+        If there is a need of padding, what value to pad at the end of the input array.
 
 
     Inputs:
@@ -158,7 +158,7 @@ class MEL(Block):
     sampling_rate: int, default 22050
         sampling rate of the input audio signal
     num_fft: int, default 2048
-        length of the Fast fourier transform window
+        length of the Fast Fourier transform window
     num_mels: int, default 20
         number of mel bands to generate
     hop_length: int, default 512

From 75e1507152d2ab98074d81f27055484492d60f09 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Tue, 27 Nov 2018 15:39:52 -0800
Subject: [PATCH 12/21] Addressed PR comments around librosa import

---
 example/gluon/urban_sounds/README.md        |  29 ++++--
 example/gluon/urban_sounds/datasets.py      | 109 ++++++++++----------
 example/gluon/urban_sounds/predict.py       |  10 +-
 example/gluon/urban_sounds/requirements.txt |   2 +
 example/gluon/urban_sounds/train.py         |  20 ++--
 example/gluon/urban_sounds/transforms.py    |   1 -
 6 files changed, 90 insertions(+), 81 deletions(-)
 create mode 100644 example/gluon/urban_sounds/requirements.txt

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index f16206f2d4a5..76e6c20d4cd3 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -1,5 +1,13 @@
 # Urban Sounds classification in MXNet
 
+This example provides an end-to-end pipeline for a common datahack competition - Urban Sounds Classification Example.
+Below is the link to the competition:
+https://datahack.analyticsvidhya.com/contest/practice-problem-urban-sound-classification/
+
+After logging in, the data set can be downloaded.
+The details of the dataset and the link to download it are given below:
+
+
 Urban Sounds Dataset:
 ## Description
   The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
@@ -7,19 +15,22 @@ Urban Sounds Dataset:
 
 To be able to run this example:
 
-1. Download the dataset(train.zip, test.zip) required for this example from the location:
-**https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU**
-  
+1. `pip install -r ./requirements.txt`
+
+    This step installs the required libraries to run the example.
+    The main dependency that is required is: Librosa. 
+    The version used to test the example is: `0.6.2`
+    For more details, refer here:
+*https://librosa.github.io/librosa/install.html*
+
+2. Download the dataset(train.zip, test.zip) required for this example from the location:
+https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
 
-2. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\
+3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\
    **Train** and **Test** and two csv files - **train.csv**, **test.csv**
 
-3. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/**
+4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/**
 
-4. Librosa is installed. To install, use the commands
-   `pip install librosa`,
-   For more details, refer here:
-   **https://librosa.github.io/librosa/install.html**
 
 
 For information on the current design of how the AudioFolderDataset is implemented, refer below:
diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 2b4fe7519e25..78da5eb2e84d 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -22,15 +22,18 @@
 
 import os
 import warnings
+from itertools import islice
+import csv
 from mxnet.gluon.data import Dataset
 from mxnet import ndarray as nd
 try:
     import librosa
 except ImportError as e:
-    warnings.warn("librosa dependency could not be resolved or \
+    raise ImportError("librosa dependency could not be resolved or \
     imported, could not load audio onto the numpy array. pip install librosa")
 
 
+
 class AudioFolderDataset(Dataset):
     """A dataset for loading Audio files stored in a folder structure like::
 
@@ -58,7 +61,7 @@ class AudioFolderDataset(Dataset):
     Attributes
     ----------
     synsets : list
-        List of class names. `synsets[i]` is the name for the integer label `i`
+        List of class names. `synsets[i]` is the name for the  `i`th label
     items : list of tuples
         List of all audio in (filename, label) pairs.
 
@@ -66,17 +69,16 @@ class AudioFolderDataset(Dataset):
     def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False):
         if not librosa:
             warnings.warn("pip install librosa to continue.")
-            return
+            raise RuntimeError("Librosa not installed. Run pip install librosa and retry this step.")
         self._root = os.path.expanduser(root)
         self._exts = ['.wav']
         self._format = file_format
         self._train_csv = train_csv
         if file_format.lower() not in self._exts:
             raise RuntimeError("format {} not supported currently.".format(file_format))
+        skip_rows = 0
         if skip_header:
             skip_rows = 1
-        else:
-            skip_rows = 0
         self._list_audio_files(self._root, skip_rows=skip_rows)
 
 
@@ -86,58 +88,61 @@ def _list_audio_files(self, root, skip_rows=0):
         """
         self.synsets = []
         self.items = []
-        if self._train_csv is None:
-            for folder in sorted(os.listdir(root)):
-                path = os.path.join(root, folder)
-                if not os.path.isdir(path):
-                    warnings.warn('Ignoring {}, which is not a directory.'.format(path))
-                    continue
-                label = len(self.synsets)
-                self.synsets.append(folder)
-                for filename in sorted(os.listdir(path)):
-                    file_name = os.path.join(path, filename)
-                    ext = os.path.splitext(file_name)[1]
-                    if ext.lower() not in self._exts:
-                        warnings.warn('Ignoring {} of type {}. Only support {}'\
-                        .format(filename, ext, ', '.join(self._exts)))
-                        continue
-                    self.items.append((file_name, label))
+        if not self._train_csv:
+            # The audio files are organized in folder structure with
+            # directory name as label and audios in them
+            self._folder_structure(root)
         else:
-            skipped_rows = 0
-            with open(self._train_csv, "r") as traincsv:
-                for line in traincsv:
-                    skipped_rows = skipped_rows + 1
-                    if skipped_rows <= skip_rows:
-                        continue
-                    filename = os.path.join(root, line.split(",")[0])
-                    label = line.split(",")[1].strip()
-                    if label not in self.synsets:
-                        self.synsets.append(label)
-                    if self._format not in filename:
-                        filename = filename+self._format
-                    self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,))))
-
-            #Generating the synset.txt file now
-            if not os.path.exists("./synset.txt"):
-                with open("./synset.txt", "w") as synsets_file:
-                    for item in self.synsets:
-                        synsets_file.write(item+os.linesep)
-                print("Synsets is generated as synset.txt")
-            else:
-                warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.")
+            # train_csv contains mapping between filename and label
+            self._csv_labelled_dataset(root, skip_rows=skip_rows)
+
+        #Generating the synset.txt file now
+        if not os.path.exists("./synset.txt"):
+            with open("./synset.txt", "w") as synsets_file:
+                for item in self.synsets:
+                    synsets_file.write(item+os.linesep)
+            print("Synsets is generated as synset.txt")
+        else:
+            warnings.warn("Synset file already exists in the current directory! Not generating synset.txt.")
+
+
+    def _folder_structure(self, root):
+        for folder in sorted(os.listdir(root)):
+            path = os.path.join(root, folder)
+            if not os.path.isdir(path):
+                warnings.warn('Ignoring {}, which is not a directory.'.format(path))
+                continue
+            label = len(self.synsets)
+            self.synsets.append(folder)
+            for filename in sorted(os.listdir(path)):
+                file_name = os.path.join(path, filename)
+                ext = os.path.splitext(file_name)[1]
+                if ext.lower() not in self._exts:
+                    warnings.warn('Ignoring {} of type {}. Only support {}'\
+                    .format(filename, ext, ', '.join(self._exts)))
+                    continue
+                self.items.append((file_name, label))
+
+
+    def _csv_labelled_dataset(self, root, skip_rows=0):
+        with open(self._train_csv, "r") as traincsv:
+            for line in islice(csv.reader(traincsv), skip_rows, None):
+                filename = os.path.join(root, line[0])
+                label = line[1].strip()
+                if label not in self.synsets:
+                    self.synsets.append(label)
+                if self._format not in filename:
+                    filename = filename+self._format
+                self.items.append((filename, nd.array([self.synsets.index(label)]).reshape((1,))))
 
 
     def __getitem__(self, idx):
         """Retrieve the item (data, label) stored at idx in items"""
-        filename = self.items[idx][0]
-        label = self.items[idx][1]
-        if librosa is not None:
-            X1, _ = librosa.load(filename, res_type='kaiser_fast')
-            return nd.array(X1), label
-        else:
-            warnings.warn(" Dependency librosa is not installed! \
-            Cannot load the audio(wav) file into the numpy.ndarray.")
-            return self.items[idx][0], self.items[idx][1]
+        filename, label = self.items[idx]
+        # resampling_type is passed as kaiser_fast for a better performance
+        X1, _ = librosa.load(filename, res_type='kaiser_fast')
+        return nd.array(X1), label
+
 
     def __len__(self):
         """Retrieves the number of items in the dataset"""
diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py
index 9b92541bb27b..1c37bd0204db 100644
--- a/example/gluon/urban_sounds/predict.py
+++ b/example/gluon/urban_sounds/predict.py
@@ -22,6 +22,10 @@
 from mxnet import nd
 from transforms import MFCC
 from model import get_net
+try:
+    import librosa
+except ImportError:
+    raise ImportError("Librosa is not installed! please run the following command pip install librosa.")
 
 def predict(prediction_dir='./Test'):
     """The function is used to run predictions on the audio files in the directory `pred_directory`.
@@ -35,12 +39,6 @@ def predict(prediction_dir='./Test'):
 
     """
 
-    try:
-        import librosa
-    except ImportError:
-        warnings.warn("Librosa is not installed! please run the following command pip install librosa.")
-        return
-
     if not os.path.exists(prediction_dir):
         warnings.warn("The directory on which predictions are to be made is not found!")
         return
diff --git a/example/gluon/urban_sounds/requirements.txt b/example/gluon/urban_sounds/requirements.txt
new file mode 100644
index 000000000000..d885e0beec7e
--- /dev/null
+++ b/example/gluon/urban_sounds/requirements.txt
@@ -0,0 +1,2 @@
+librosa>=0.6.2 # librosa is a library that is used to load the audio(wav) files and provides capabilities of feature extraction.
+argparse # used for parsing arguments
\ No newline at end of file
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index a2e3066bd332..a30e6ae78701 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -116,10 +116,14 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
 
 
 if __name__ == '__main__':
+    training_dir = './Train'
+    training_csv = './train.csv'
+    eps = 30
+    batch_sz = 32
 
     try:
         import argparse
-        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet")
+        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet Gluon")
         parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str)
         parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\
         to label mapping", type=str)
@@ -131,31 +135,21 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
         if args:
             if args.train:
                 training_dir = args.train
-            else:
-                training_dir = './Train'
 
             if args.csv:
                 training_csv = args.csv
-            else:
-                training_csv = './train.csv'
 
             if args.epochs:
                 eps = args.epochs
-            else:
-                eps = 30
 
             if args.batch_size:
                 batch_sz = args.batch_size
-            else:
-                batch_sz = 32
+
 
     except ImportError as er:
         warnings.warn("Argument parsing module could not be imported \
         Passing default arguments.")
-        training_dir = './Train'
-        training_csv = './train.csv'
-        eps = 30
-        batch_sz = 32
+
 
     train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz)
     print("Urban sounds classification Training DONE!")
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index a75e1543338d..9b3f9428f272 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -204,4 +204,3 @@ def forward(self, x):
         specs = librosa.feature.melspectrogram(x, sr=self._sampling_rate,\
         n_fft=self._num_fft, n_mels=self._num_mels, hop_length=self._hop_length)
         return nd.array(specs)
- 
\ No newline at end of file

From cc3714a7ce6a1082122448a26b2fa49fbf4a9855 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Tue, 27 Nov 2018 20:13:42 -0800
Subject: [PATCH 13/21] PR Comments

---
 example/gluon/urban_sounds/datasets.py | 2 +-
 example/gluon/urban_sounds/train.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 78da5eb2e84d..39c3337e019d 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -163,7 +163,7 @@ def transform_first(self, fn, lazy=False):
         fn : callable
             A transformer function that takes the first element of a sample
             as input and returns the transformed element.
-        lazy : bool, default True
+        lazy : bool, default False
             If False, transforms all samples at once. Otherwise,
             transforms each sample on demand. Note that if `fn`
             is stochastic, you must set lazy to True or you will
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index a30e6ae78701..28dafa22592d 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -37,7 +37,7 @@ def evaluate_accuracy(data_iterator, net):
 
 
 def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
-    """The function responsible for running the training the model."""
+    """Function responsible for running the training the model."""
 
     if not train_dir or not os.path.exists(train_dir) or not train_csv:
         warnings.warn("No train directory could be found ")
@@ -100,7 +100,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
 
         if e%5 == 0:
             train_accuracy = evaluate_accuracy(audio_train_loader, net)
-            print("Epoch %s. Loss: %s Train accuracy : %s " % (e, cumulative_loss/num_examples, train_accuracy))
+            print("Epoch {}. Loss: {} Train accuracy : {} ".format(e, cumulative_loss/num_examples, train_accuracy))
             print("\n------------------------------\n")
 
     train_accuracy = evaluate_accuracy(audio_train_loader, net)

From 51101f2c27cac4aec6787259f07f1e82c8673391 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Wed, 28 Nov 2018 09:36:03 -0800
Subject: [PATCH 14/21] Passing lazy=lazy from argument

---
 example/gluon/urban_sounds/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 39c3337e019d..31c816dceb68 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -175,4 +175,4 @@ def transform_first(self, fn, lazy=False):
             The transformed dataset.
 
         """
-        return super(AudioFolderDataset, self).transform_first(fn, lazy=False)
+        return super(AudioFolderDataset, self).transform_first(fn, lazy=lazy)

From c41b9b39560f80e115e6384154a487e55a543a79 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Wed, 28 Nov 2018 15:45:45 -0800
Subject: [PATCH 15/21] Added PR comments, labels to README.MD

---
 example/gluon/urban_sounds/README.md     | 51 ++++++++++++++++++------
 example/gluon/urban_sounds/datasets.py   |  5 ++-
 example/gluon/urban_sounds/predict.py    |  7 ++--
 example/gluon/urban_sounds/train.py      | 21 +++++-----
 example/gluon/urban_sounds/transforms.py |  3 +-
 5 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index 76e6c20d4cd3..35d53963b7b9 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -8,16 +8,29 @@ After logging in, the data set can be downloaded.
 The details of the dataset and the link to download it are given below:
 
 
-Urban Sounds Dataset:
-## Description
+##Urban Sounds Dataset:
+### Description
   The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
-  The task is to classify these audio samples into one of the 10 labels.
+  The task is to classify these audio samples into one of the following 10 labels:
+  ```
+  siren,
+  street_music,
+  drilling,
+  dog_bark,
+  children_playing,
+  gun_shot,
+  engine_idling,
+  air_conditioner,
+  jackhammer,
+  car_horn
+  ```
 
 To be able to run this example:
 
-1. `pip install -r ./requirements.txt`
+1. `pip install -r requirements.txt`
 
-    This step installs the required libraries to run the example.
+    If you are in the directory where the requirements.txt file lies,
+    this step installs the required libraries to run the example.
     The main dependency that is required is: Librosa. 
     The version used to test the example is: `0.6.2`
     For more details, refer here:
@@ -26,9 +39,21 @@ To be able to run this example:
 2. Download the dataset(train.zip, test.zip) required for this example from the location:
 https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
 
-3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,\
+3. Extract both the zip archives into the **current directory** - after unzipping you would get 2 new folders namely,
    **Train** and **Test** and two csv files - **train.csv**, **test.csv**
 
+   Assuming you are in a directory *"UrbanSounds"*, after downloading and extracting train.zip, the folder structure should be:
+   
+   ```
+        UrbanSounds        
+                    - Train
+                        - 0.wav, 1.wav ...
+                    - train.csv
+                    - datasets.py
+                    - train.py
+                    - predict.py ...
+    ```
+
 4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/**
 
 
@@ -36,41 +61,41 @@ https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
 For information on the current design of how the AudioFolderDataset is implemented, refer below:
 **https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio**
 
-## Usage 
+### Usage 
 
 For training:
 
-- arguments
+- Arguments
   - train : The folder/directory that contains the audio(wav) files locally. Default = "./Train"
   - csv: The file name of the csv file that contains audio file name to label mapping. Default = "train.csv"
   - epochs : Number of epochs to train the model. Default = 30
   - batch_size : The batch size for training. Default = 32
 
 
-###### default setting
+###### To use the default arguments, use:
 ```
 python train.py
 ``` 
 or
 
-###### manual setting
+###### To pass command-line arguments for training data directory, epochs, batch_size, csv file name, use :
 ```
 python train.py --train ./Train --csv train.csv --batch_size 32 --epochs 30 
 ```
 
 For prediction:
 
-- arguments
+- Arguments
   - pred : The folder/directory that contains the audio(wav) files which are to be classified. Default = "./Test"
 
 
-###### default setting
+###### To use the default arguments, use:
 ```
 python predict.py
 ``` 
 or
 
-###### manual setting
+###### To pass command-line arguments for test data directory, use :
 ```
 python train.py --pred ./Test
 ```
\ No newline at end of file
diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/urban_sounds/datasets.py
index 31c816dceb68..51c040c8f162 100644
--- a/example/gluon/urban_sounds/datasets.py
+++ b/example/gluon/urban_sounds/datasets.py
@@ -18,6 +18,7 @@
 # coding: utf-8
 # pylint: disable=
 """ Audio Dataset container."""
+from __future__ import print_function
 __all__ = ['AudioFolderDataset']
 
 import os
@@ -75,7 +76,7 @@ def __init__(self, root, train_csv=None, file_format='.wav', skip_header=False):
         self._format = file_format
         self._train_csv = train_csv
         if file_format.lower() not in self._exts:
-            raise RuntimeError("format {} not supported currently.".format(file_format))
+            raise RuntimeError("Format {} not supported currently.".format(file_format))
         skip_rows = 0
         if skip_header:
             skip_rows = 1
@@ -96,7 +97,7 @@ def _list_audio_files(self, root, skip_rows=0):
             # train_csv contains mapping between filename and label
             self._csv_labelled_dataset(root, skip_rows=skip_rows)
 
-        #Generating the synset.txt file now
+        # Generating the synset.txt file now
         if not os.path.exists("./synset.txt"):
             with open("./synset.txt", "w") as synsets_file:
                 for item in self.synsets:
diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/urban_sounds/predict.py
index 1c37bd0204db..bae51b3251de 100644
--- a/example/gluon/urban_sounds/predict.py
+++ b/example/gluon/urban_sounds/predict.py
@@ -14,8 +14,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-""" Prediction module for Urban Sounds Classification
-"""
+""" Prediction module for Urban Sounds Classification"""
+from __future__ import print_function
 import os
 import warnings
 import mxnet as mx
@@ -25,7 +25,8 @@
 try:
     import librosa
 except ImportError:
-    raise ImportError("Librosa is not installed! please run the following command pip install librosa.")
+    raise ImportError("Librosa is not installed! please run the following command:\
+     `pip install librosa`")
 
 def predict(prediction_dir='./Test'):
     """The function is used to run predictions on the audio files in the directory `pred_directory`.
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index 28dafa22592d..2132437f9992 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -14,8 +14,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""The module to run training on the Urban sounds dataset
-"""
+"""The module to run training on the Urban sounds dataset"""
+from __future__ import print_function
 import os
 import time
 import warnings
@@ -25,6 +25,7 @@
 from transforms import MFCC
 import model
 
+
 def evaluate_accuracy(data_iterator, net):
     """Function to evaluate accuracy of any data iterator passed to it as an argument"""
     acc = mx.metric.Accuracy()
@@ -87,7 +88,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
     batch_size = batch_size
     num_examples = len(aud_dataset)
 
-    for e in range(epochs):
+    for epoch in range(epochs):
         cumulative_loss = 0
         for data, label in audio_train_loader:
             with autograd.record():
@@ -98,9 +99,9 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
             trainer.step(batch_size)
             cumulative_loss += mx.nd.sum(loss).asscalar()
 
-        if e%5 == 0:
+        if epoch%5 == 0:
             train_accuracy = evaluate_accuracy(audio_train_loader, net)
-            print("Epoch {}. Loss: {} Train accuracy : {} ".format(e, cumulative_loss/num_examples, train_accuracy))
+            print("Epoch {}. Loss: {} Train accuracy : {} ".format(epoch, cumulative_loss/num_examples, train_accuracy))
             print("\n------------------------------\n")
 
     train_accuracy = evaluate_accuracy(audio_train_loader, net)
@@ -118,8 +119,8 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
 if __name__ == '__main__':
     training_dir = './Train'
     training_csv = './train.csv'
-    eps = 30
-    batch_sz = 32
+    epochs = 30
+    batch_size = 32
 
     try:
         import argparse
@@ -140,10 +141,10 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
                 training_csv = args.csv
 
             if args.epochs:
-                eps = args.epochs
+                epochs = args.epochs
 
             if args.batch_size:
-                batch_sz = args.batch_size
+                batch_size = args.batch_size
 
 
     except ImportError as er:
@@ -151,5 +152,5 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
         Passing default arguments.")
 
 
-    train(train_dir=training_dir, train_csv=training_csv, epochs=eps, batch_size=batch_sz)
+    train(train_dir=training_dir, train_csv=training_csv, epochs=epochs, batch_size=batch_size)
     print("Urban sounds classification Training DONE!")
diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/urban_sounds/transforms.py
index 9b3f9428f272..8b76d131cdb1 100644
--- a/example/gluon/urban_sounds/transforms.py
+++ b/example/gluon/urban_sounds/transforms.py
@@ -14,10 +14,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
 # coding: utf-8
 # pylint: disable= arguments-differ
-"Audio transforms."
+"""Audio transforms."""
 
 import warnings
 import numpy as np

From 5eef58f2d3f8089f21483de770d793eff47aae48 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Wed, 28 Nov 2018 19:22:36 -0800
Subject: [PATCH 16/21] Trigger CI


From 2465b0c4b7ae42b8d8283e99c1430da024524a4e Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Thu, 29 Nov 2018 13:28:10 -0800
Subject: [PATCH 17/21] Addressing PR Comments in README

---
 example/gluon/urban_sounds/README.md | 2 +-
 example/gluon/urban_sounds/train.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index 35d53963b7b9..92c201614fa5 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -1,4 +1,4 @@
-# Urban Sounds classification in MXNet
+# Urban Sounds Classification in MXNet Gluon
 
 This example provides an end-to-end pipeline for a common datahack competition - Urban Sounds Classification Example.
 Below is the link to the competition:
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/urban_sounds/train.py
index 2132437f9992..e475e238a21e 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/urban_sounds/train.py
@@ -124,7 +124,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
 
     try:
         import argparse
-        parser = argparse.ArgumentParser(description="Urban Sounds clsssification example - MXNet Gluon")
+        parser = argparse.ArgumentParser(description="Urban Sounds classification example - MXNet Gluon")
         parser.add_argument('--train', '-t', help="Enter the folder path that contains your audio files", type=str)
         parser.add_argument('--csv', '-c', help="Enter the filename of the csv that contains filename\
         to label mapping", type=str)

From 4e0d54152db2671251f4f2319b88f1acfdcb7c9d Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Thu, 29 Nov 2018 13:40:22 -0800
Subject: [PATCH 18/21] Modified README.md

---
 example/gluon/urban_sounds/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/urban_sounds/README.md
index 92c201614fa5..af95b2653226 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/urban_sounds/README.md
@@ -8,7 +8,7 @@ After logging in, the data set can be downloaded.
 The details of the dataset and the link to download it are given below:
 
 
-##Urban Sounds Dataset:
+## Urban Sounds Dataset:
 ### Description
   The dataset contains 8732 wav files which are audio samples(<= 4s)) of street sounds like engine_idling, car_horn, children_playing, dog_barking and so on.
   The task is to classify these audio samples into one of the following 10 labels:

From 74106e0365702ae6766b501543adce1e8ba05a93 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Thu, 29 Nov 2018 18:49:06 -0800
Subject: [PATCH 19/21] Added example under audio folder

---
 example/gluon/{urban_sounds => audio}/transforms.py     | 0
 example/gluon/{ => audio}/urban_sounds/README.md        | 9 ++++-----
 example/gluon/{ => audio}/urban_sounds/datasets.py      | 0
 example/gluon/{ => audio}/urban_sounds/model.py         | 0
 example/gluon/{ => audio}/urban_sounds/predict.py       | 4 +++-
 example/gluon/{ => audio}/urban_sounds/requirements.txt | 0
 example/gluon/{ => audio}/urban_sounds/train.py         | 5 +++--
 7 files changed, 10 insertions(+), 8 deletions(-)
 rename example/gluon/{urban_sounds => audio}/transforms.py (100%)
 rename example/gluon/{ => audio}/urban_sounds/README.md (92%)
 rename example/gluon/{ => audio}/urban_sounds/datasets.py (100%)
 rename example/gluon/{ => audio}/urban_sounds/model.py (100%)
 rename example/gluon/{ => audio}/urban_sounds/predict.py (98%)
 rename example/gluon/{ => audio}/urban_sounds/requirements.txt (100%)
 rename example/gluon/{ => audio}/urban_sounds/train.py (98%)

diff --git a/example/gluon/urban_sounds/transforms.py b/example/gluon/audio/transforms.py
similarity index 100%
rename from example/gluon/urban_sounds/transforms.py
rename to example/gluon/audio/transforms.py
diff --git a/example/gluon/urban_sounds/README.md b/example/gluon/audio/urban_sounds/README.md
similarity index 92%
rename from example/gluon/urban_sounds/README.md
rename to example/gluon/audio/urban_sounds/README.md
index af95b2653226..c85d29db2e5a 100644
--- a/example/gluon/urban_sounds/README.md
+++ b/example/gluon/audio/urban_sounds/README.md
@@ -34,7 +34,7 @@ To be able to run this example:
     The main dependency that is required is: Librosa. 
     The version used to test the example is: `0.6.2`
     For more details, refer here:
-*https://librosa.github.io/librosa/install.html*
+https://librosa.github.io/librosa/install.html
 
 2. Download the dataset(train.zip, test.zip) required for this example from the location:
 https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
@@ -49,17 +49,16 @@ https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
                     - Train
                         - 0.wav, 1.wav ...
                     - train.csv
-                    - datasets.py
                     - train.py
                     - predict.py ...
     ```
 
-4. Apache MXNet is installed on the machine. For instructions, go to the link: **https://mxnet.incubator.apache.org/install/**
+4. Apache MXNet is installed on the machine. For instructions, go to the link: https://mxnet.incubator.apache.org/install/
 
 
 For information on the current design of how the AudioFolderDataset is implemented, refer below:
-**https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio**
+https://cwiki.apache.org/confluence/display/MXNET/Gluon+-+Audio
 
 ### Usage 
 
@@ -97,5 +96,5 @@ or
 
 ###### To pass command-line arguments for test data directory, use :
 ```
-python train.py --pred ./Test
+python predict.py --pred ./Test
 ```
\ No newline at end of file
diff --git a/example/gluon/urban_sounds/datasets.py b/example/gluon/audio/urban_sounds/datasets.py
similarity index 100%
rename from example/gluon/urban_sounds/datasets.py
rename to example/gluon/audio/urban_sounds/datasets.py
diff --git a/example/gluon/urban_sounds/model.py b/example/gluon/audio/urban_sounds/model.py
similarity index 100%
rename from example/gluon/urban_sounds/model.py
rename to example/gluon/audio/urban_sounds/model.py
diff --git a/example/gluon/urban_sounds/predict.py b/example/gluon/audio/urban_sounds/predict.py
similarity index 98%
rename from example/gluon/urban_sounds/predict.py
rename to example/gluon/audio/urban_sounds/predict.py
index bae51b3251de..0c3631173667 100644
--- a/example/gluon/urban_sounds/predict.py
+++ b/example/gluon/audio/urban_sounds/predict.py
@@ -17,16 +17,17 @@
 """ Prediction module for Urban Sounds Classification"""
 from __future__ import print_function
 import os
+import sys
 import warnings
 import mxnet as mx
 from mxnet import nd
-from transforms import MFCC
 from model import get_net
 try:
     import librosa
 except ImportError:
     raise ImportError("Librosa is not installed! please run the following command:\
      `pip install librosa`")
+sys.path.append('../')
 
 def predict(prediction_dir='./Test'):
     """The function is used to run predictions on the audio files in the directory `pred_directory`.
@@ -64,6 +65,7 @@ def predict(prediction_dir='./Test'):
     net.load_parameters("./net.params")
     file_names = os.listdir(prediction_dir)
     full_file_names = [os.path.join(prediction_dir, item) for item in file_names]
+    from transforms import MFCC
     mfcc = MFCC()
     print("\nStarting predictions for audio files in ", prediction_dir, " ....\n")
     for filename in full_file_names:
diff --git a/example/gluon/urban_sounds/requirements.txt b/example/gluon/audio/urban_sounds/requirements.txt
similarity index 100%
rename from example/gluon/urban_sounds/requirements.txt
rename to example/gluon/audio/urban_sounds/requirements.txt
diff --git a/example/gluon/urban_sounds/train.py b/example/gluon/audio/urban_sounds/train.py
similarity index 98%
rename from example/gluon/urban_sounds/train.py
rename to example/gluon/audio/urban_sounds/train.py
index e475e238a21e..c88f9fb55187 100644
--- a/example/gluon/urban_sounds/train.py
+++ b/example/gluon/audio/urban_sounds/train.py
@@ -16,15 +16,15 @@
 # under the License.
 """The module to run training on the Urban sounds dataset"""
 from __future__ import print_function
+import sys
 import os
 import time
 import warnings
 import mxnet as mx
 from mxnet import gluon, nd, autograd
 from datasets import AudioFolderDataset
-from transforms import MFCC
 import model
-
+sys.path.append('../')
 
 def evaluate_accuracy(data_iterator, net):
     """Function to evaluate accuracy of any data iterator passed to it as an argument"""
@@ -73,6 +73,7 @@ def train(train_dir=None, train_csv=None, epochs=30, batch_size=32):
     print("Loading the dataset to the Gluon's OOTB Dataloader...")
 
     #Getting the data loader out of the AudioDataset and passing the transform
+    from transforms import MFCC
     aud_transform = MFCC()
     tick = time.time()
 

From 5eb923ed34f98959fede75464a77964a632166ba Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Fri, 30 Nov 2018 14:22:37 -0800
Subject: [PATCH 20/21] Retrigger CI


From 5461bc78efe0bd01c92849449c2e2cbd1c8396d0 Mon Sep 17 00:00:00 2001
From: gaurav-gireesh <gaurav.gireesh@gmail.com>
Date: Fri, 30 Nov 2018 17:24:25 -0800
Subject: [PATCH 21/21] Retrigger CI