From 0e3f7716e99610e2962093f6efeb7cd17808569f Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Fri, 21 Sep 2018 15:09:18 -0700
Subject: [PATCH 1/9] Adding the example to demonstrate the usage of CSVIter

---
 cpp-package/example/mlp_csv_cpu.cpp           | 188 ++++++++++++++++++
 cpp-package/example/mnist_to_csv.py           |  59 ++++++
 .../unittests/unit_test_mlp_csv_cpu.sh        |  63 ++++++
 3 files changed, 310 insertions(+)
 create mode 100644 cpp-package/example/mlp_csv_cpu.cpp
 create mode 100644 cpp-package/example/mnist_to_csv.py
 create mode 100755 cpp-package/unittests/unit_test_mlp_csv_cpu.sh
diff --git a/cpp-package/example/mlp_csv_cpu.cpp b/cpp-package/example/mlp_csv_cpu.cpp
new file mode 100644
index 000000000000..6ee5344ff726
--- /dev/null
+++ b/cpp-package/example/mlp_csv_cpu.cpp
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Amol Lele amol.github@gmail.com
+ */
+
+/*
+ * Example: mlp_csv_cpu
+ * Description:
+ * The following example demonstrates how to use CSVIter. This example creates
+ * mlp (multi-layer perceptron) model and trains the MNIST data which is in
+ * CSV format.
+ */
+#include <chrono>
+#include "utils.h"
+#include "mxnet-cpp/MxNetCpp.h"
+
+using namespace mxnet::cpp;
+
+/*
+ * Implementing the mlp symbol with given layer configuration.
+ */
+Symbol mlp(const std::vector<int> &layers)
+{
+    auto x = Symbol::Variable("X");
+    auto label = Symbol::Variable("label");
+
+    std::vector<Symbol> weights(layers.size());
+    std::vector<Symbol> biases(layers.size());
+    std::vector<Symbol> outputs(layers.size());
+
+    for (size_t i = 0; i < layers.size(); ++i) {
+        weights[i] = Symbol::Variable("w" + std::to_string(i));
+        biases[i] = Symbol::Variable("b" + std::to_string(i));
+        Symbol fc = FullyConnected(
+                                   i == 0? x : outputs[i-1],  // data
+                                   weights[i],
+                                   biases[i],
+                                   layers[i]);
+        outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
+    }
+    return SoftmaxOutput(outputs.back(), label);
+}
+
+
+int main(int argc, char** argv)
+{
+    const int image_size = 28;
+    const int num_mnist_features = image_size * image_size;
+    const std::vector<int> layers{128, 64, 10};
+    const int batch_size = 100;
+    const int max_epoch = 10;
+    const float learning_rate = 0.1;
+    const float weight_decay = 1e-2;
+
+    /*
+     * The MNIST data in CSV format has 785 columns.
+     * The first column is "Label" and rest of the columns contain data.
+     * The mnist_train.csv has 60000 records and mnist_test.csv has
+     * 10000 records.
+     */
+    std::vector<std::string> data_files = { "./data/mnist_train.csv",
+                                            "./data/mnist_test.csv"};
+
+    auto train_iter = MXDataIter("CSVIter")
+    .SetParam("data_csv", "./data/mnist_train.csv")
+    .SetParam("data_shape", Shape(num_mnist_features + 1,1))
+    .SetParam("batch_size", batch_size)
+    .SetParam("flat", 1)
+    .SetParam("shuffle",0)
+    .CreateDataIter();
+
+    auto val_iter = MXDataIter("CSVIter")
+    .SetParam("data_csv", "./data/mnist_test.csv")
+    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
+    .SetParam("batch_size", batch_size)
+    .SetParam("flat", 1)
+    .SetParam("shuffle",0)
+    .CreateDataIter();
+
+    auto net = mlp(layers);
+    
+    Context ctx = Context::cpu();  // Use CPU for training
+    
+    std::map<std::string, NDArray> args;
+    args["X"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
+    args["label"] = NDArray(Shape(batch_size), ctx);
+    // Let MXNet infer shapes other parameters such as weights
+    net.InferArgsMap(ctx, &args, args);
+    
+    // Initialize all parameters with uniform distribution U(-0.01, 0.01)
+    auto initializer = Uniform(0.01);
+    for (auto& arg : args) {
+        // arg.first is parameter name, and arg.second is the value
+        initializer(arg.first, &arg.second);
+    }
+    
+    // Create sgd optimizer
+    Optimizer* opt = OptimizerRegistry::Find("sgd");
+    opt->SetParam("rescale_grad", 1.0/batch_size)
+        ->SetParam("lr", learning_rate)
+        ->SetParam("wd", weight_decay);
+    
+    // Create executor by binding parameters to the model
+    auto *exec = net.SimpleBind(ctx, args);
+    auto arg_names = net.ListArguments();
+    
+    // Start training
+    for (int iter = 0; iter < max_epoch; ++iter) {
+        int samples = 0;
+        train_iter.Reset();
+        
+        auto tic = std::chrono::system_clock::now();
+        while (train_iter.Next()) {
+            samples += batch_size;
+            auto data_batch = train_iter.GetDataBatch();
+            
+            /*
+             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
+             * Need to reshape this data so that label column can be extracted from this data.
+             */
+            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));
+            
+            // Extract the label data by slicing the first column of the data and copy it to "label" arg.
+            reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
+            
+            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);
+            
+            // Compute gradients
+            exec->Forward(true);
+            exec->Backward();
+            // Update parameters
+            for (size_t i = 0; i < arg_names.size(); ++i) {
+                if (arg_names[i] == "X" || arg_names[i] == "label") continue;
+                opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
+            }
+        }
+        auto toc = std::chrono::system_clock::now();
+        
+        Accuracy acc;
+        val_iter.Reset();
+        while (val_iter.Next()) {
+            auto data_batch = val_iter.GetDataBatch();
+            
+            /*
+             * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
+             * Need to reshape this data so that label column can be extracted from this data.
+             */
+            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));
+            
+            // Extract the label data by slicing the first column of the data and copy it to "label" arg.
+            NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size));
+            labelData.CopyTo(&args["label"]);
+            
+            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);
+            
+            // Forward pass is enough as no gradient is needed when evaluating
+            exec->Forward(false);
+            acc.Update(labelData, exec->outputs[0]);
+        }
+        float duration = std::chrono::duration_cast<std::chrono::milliseconds>
+        (toc - tic).count() / 1000.0;
+        LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
+    }
+    
+    delete exec;
+    MXNotifyShutdown();
+    return 0;
+}
diff --git a/cpp-package/example/mnist_to_csv.py b/cpp-package/example/mnist_to_csv.py
new file mode 100644
index 000000000000..dad9ed5f9c72
--- /dev/null
+++ b/cpp-package/example/mnist_to_csv.py
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Following file converts the mnist data to CSV format.
+# Usage:
+# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000
+# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000
+#
+
+import argparse
+
+def convert_to_csv(args):
+    imageFile = open(args.imageFile, "rb")
+    labelFile = open(args.labelFile, "rb")
+    outputFile = open(args.outputFile, "w")
+
+    imageFile.read(16)
+    labelFile.read(8)
+    images = []
+
+    for i in range(args.num_records):
+        image = [ord(labelFile.read(1))]
+        for j in range(28 * 28):
+            image.append(ord(imageFile.read(1)))
+        images.append(image)
+
+    for image in images:
+        outputFile.write(",".join(str(pix) for pix in image) + "\n")
+
+    imageFile.close()
+    outputFile.close()
+    labelFile.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte")
+    parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte")
+    parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv")
+    parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000")
+    args = parser.parse_args()
+
+    try:
+        convert_to_csv(args)
+    except Exception as e:
+        print("Error : Exception {}".format(str(e)))
diff --git a/cpp-package/unittests/unit_test_mlp_csv_cpu.sh b/cpp-package/unittests/unit_test_mlp_csv_cpu.sh
new file mode 100755
index 000000000000..b64f552c8417
--- /dev/null
+++ b/cpp-package/unittests/unit_test_mlp_csv_cpu.sh
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is a unit test for mlp_csv_cpu.cpp example in 'example' directory.
+# The file
+#    1. Downloads the MNIST data,
+#    2. Converts it into CSV format.
+#    3. Runs the mlp_csv_cpu example and ensures that the accuracy is more than expected.
+#
+
+#!/bin/bash
+
+set -e # exit on the first error
+export EXE_NAME=mlp_csv_cpu
+
+cd $(dirname $(readlink -f $0))/../example
+export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH
+
+if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ];
+then
+echo "FAIL: ${EXE_NAME} does not exist"
+exit
+fi
+
+cp ../../build/cpp-package/example/${EXE_NAME} .
+
+./get_data.sh
+python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
+python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000
+
+./${EXE_NAME} 2&> ${EXE_NAME}.log
+
+if [ ! -f ${EXE_NAME}.log ];
+then
+echo "FAIL: Log file ${EXE_NAME}.log does not exist."
+exit
+fi
+
+# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format.
+export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'`
+export Acc_expected=0.98
+
+# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL.
+if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ;
+then
+echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy."
+else
+echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected."
+fi

From 735def8e0d8b5de69727c4d285f07551fdc23bf4 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Fri, 28 Sep 2018 12:21:18 -0700
Subject: [PATCH 2/9] Addressed the review comments to make the example
 configurable. Moved the unittests folder in 'examples' directory.

---
 .../example/{mlp_csv_cpu.cpp => mlp_csv.cpp}  | 128 ++++++++++++++----
 .../unittests/unit_test_mlp_csv.sh}           |  10 +-
 2 files changed, 103 insertions(+), 35 deletions(-)
 rename cpp-package/example/{mlp_csv_cpu.cpp => mlp_csv.cpp} (59%)
 rename cpp-package/{unittests/unit_test_mlp_csv_cpu.sh => example/unittests/unit_test_mlp_csv.sh} (84%)

diff --git a/cpp-package/example/mlp_csv_cpu.cpp b/cpp-package/example/mlp_csv.cpp
similarity index 59%
rename from cpp-package/example/mlp_csv_cpu.cpp
rename to cpp-package/example/mlp_csv.cpp
index 6ee5344ff726..4a8434e6e88c 100644
--- a/cpp-package/example/mlp_csv_cpu.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -35,52 +35,116 @@
 using namespace mxnet::cpp;
 
 /*
- * Implementing the mlp symbol with given layer configuration.
+ * Implementing the mlp symbol with given hidden units configuration.
  */
-Symbol mlp(const std::vector<int> &layers)
+Symbol mlp(const std::vector<int> &hidden_units)
 {
-    auto x = Symbol::Variable("X");
+    auto data = Symbol::Variable("data");
     auto label = Symbol::Variable("label");
 
-    std::vector<Symbol> weights(layers.size());
-    std::vector<Symbol> biases(layers.size());
-    std::vector<Symbol> outputs(layers.size());
+    std::vector<Symbol> weights(hidden_units.size());
+    std::vector<Symbol> biases(hidden_units.size());
+    std::vector<Symbol> outputs(hidden_units.size());
 
-    for (size_t i = 0; i < layers.size(); ++i) {
+    for (size_t i = 0; i < hidden_units.size(); ++i) {
         weights[i] = Symbol::Variable("w" + std::to_string(i));
         biases[i] = Symbol::Variable("b" + std::to_string(i));
         Symbol fc = FullyConnected(
-                                   i == 0? x : outputs[i-1],  // data
+                                   i == 0? data : outputs[i-1],  // data
                                    weights[i],
                                    biases[i],
-                                   layers[i]);
-        outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
+                                   hidden_units[i]);
+        outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
     }
     return SoftmaxOutput(outputs.back(), label);
 }
 
+/*
+ * Convert the input string of number of hidden units into the vector of integers.
+ */
+void getLayers(std::string &hidden_units_string, std::vector<int> &hidden_units)
+{
+    std::string delimiter = ",";
+    size_t pos = 0;
+    std::string token;
+    while ((pos = hidden_units_string.find(delimiter)) != std::string::npos) {
+        token = hidden_units_string.substr(0, pos);
+        hidden_units.push_back(atoi(token.c_str()));
+        hidden_units_string.erase(0, pos + delimiter.length());
+    }
+    hidden_units.push_back(atoi(hidden_units_string.c_str()));
+}
+
+void printUsage()
+{
+    std::cout << "Usage:" << std::endl;
+    std::cout << "mlp_csv_cpu --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units \"128,64,64\"" << std::endl;
+    std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes the column 0 to be label and the rest 784 column to be data." << std::endl;
+}
 
 int main(int argc, char** argv)
 {
     const int image_size = 28;
     const int num_mnist_features = image_size * image_size;
-    const std::vector<int> layers{128, 64, 10};
-    const int batch_size = 100;
-    const int max_epoch = 10;
+    int batch_size = 100;
+    int max_epoch = 10;
     const float learning_rate = 0.1;
     const float weight_decay = 1e-2;
-
+    bool isGpu = false;
+    
+    std::string training_set;
+    std::string test_set;
+    std::string hidden_units_string;
+    int index = 1;
+    while (index < argc) {
+        if (strcmp("--train", argv[index]) == 0) {
+            index++;
+            training_set = argv[index];
+        } else if (strcmp("--test", argv[index]) == 0) {
+            index++;
+            test_set = argv[index];
+        } else if (strcmp("--epochs", argv[index]) == 0) {
+            index++;
+            max_epoch = atoi(argv[index]);
+        } else if (strcmp("--batch_size", argv[index]) == 0) {
+            index++;
+            batch_size = atoi(argv[index]);
+        } else if (strcmp("--hidden_units", argv[index]) == 0) {
+            index++;
+            hidden_units_string = argv[index];
+        } else if (strcmp("--gpu", argv[index]) == 0) {
+            isGpu = true;
+            index ++;
+        } else if (strcmp("--help", argv[index]) == 0) {
+            printUsage();
+            return 0;
+        }
+        index ++;
+    }
+    
+    if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
+        std::cout << "ERROR: The mandatory arguments such as path to training and test data or number of hidden units for mlp are not specified." << std::endl << std::endl;
+        printUsage();
+        return 0;
+    }
+    
+    std::vector<int> hidden_units;
+    getLayers(hidden_units_string, hidden_units);
+    
+    if (hidden_units.empty()) {
+        std::cout << "ERROR: Number of hidden units are not provided in correct format. The numbers need to be separated by ','." << std::endl << std::endl;
+        printUsage();
+        return 0;
+    }
+    
     /*
      * The MNIST data in CSV format has 785 columns.
      * The first column is "Label" and rest of the columns contain data.
      * The mnist_train.csv has 60000 records and mnist_test.csv has
      * 10000 records.
      */
-    std::vector<std::string> data_files = { "./data/mnist_train.csv",
-                                            "./data/mnist_test.csv"};
-
     auto train_iter = MXDataIter("CSVIter")
-    .SetParam("data_csv", "./data/mnist_train.csv")
+    .SetParam("data_csv", training_set)
     .SetParam("data_shape", Shape(num_mnist_features + 1,1))
     .SetParam("batch_size", batch_size)
     .SetParam("flat", 1)
@@ -88,19 +152,22 @@ int main(int argc, char** argv)
     .CreateDataIter();
 
     auto val_iter = MXDataIter("CSVIter")
-    .SetParam("data_csv", "./data/mnist_test.csv")
+    .SetParam("data_csv", test_set)
     .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
     .SetParam("batch_size", batch_size)
     .SetParam("flat", 1)
     .SetParam("shuffle",0)
     .CreateDataIter();
 
-    auto net = mlp(layers);
-    
-    Context ctx = Context::cpu();  // Use CPU for training
+    auto net = mlp(hidden_units);
+
+    Context ctx = Context::cpu();
+    if (isGpu) {
+        ctx = Context::gpu();
+    }
     
     std::map<std::string, NDArray> args;
-    args["X"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
+    args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
     args["label"] = NDArray(Shape(batch_size), ctx);
     // Let MXNet infer shapes other parameters such as weights
     net.InferArgsMap(ctx, &args, args);
@@ -141,15 +208,16 @@ int main(int argc, char** argv)
             // Extract the label data by slicing the first column of the data and copy it to "label" arg.
             reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
             
-            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);
+            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg.
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]);
             
-            // Compute gradients
             exec->Forward(true);
+
+            // Compute gradients
             exec->Backward();
             // Update parameters
             for (size_t i = 0; i < arg_names.size(); ++i) {
-                if (arg_names[i] == "X" || arg_names[i] == "label") continue;
+                if (arg_names[i] == "data" || arg_names[i] == "label") continue;
                 opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
             }
         }
@@ -170,8 +238,8 @@ int main(int argc, char** argv)
             NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size));
             labelData.CopyTo(&args["label"]);
             
-            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);
+            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg.
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]);
             
             // Forward pass is enough as no gradient is needed when evaluating
             exec->Forward(false);
@@ -179,7 +247,7 @@ int main(int argc, char** argv)
         }
         float duration = std::chrono::duration_cast<std::chrono::milliseconds>
         (toc - tic).count() / 1000.0;
-        LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
+        LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: " << acc.Get();
     }
     
     delete exec;
diff --git a/cpp-package/unittests/unit_test_mlp_csv_cpu.sh b/cpp-package/example/unittests/unit_test_mlp_csv.sh
similarity index 84%
rename from cpp-package/unittests/unit_test_mlp_csv_cpu.sh
rename to cpp-package/example/unittests/unit_test_mlp_csv.sh
index b64f552c8417..4c2fe87eb86d 100755
--- a/cpp-package/unittests/unit_test_mlp_csv_cpu.sh
+++ b/cpp-package/example/unittests/unit_test_mlp_csv.sh
@@ -15,19 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This file is a unit test for mlp_csv_cpu.cpp example in 'example' directory.
+# This file is a unit test for mlp_csv.cpp example in 'example' directory.
 # The file
 #    1. Downloads the MNIST data,
 #    2. Converts it into CSV format.
-#    3. Runs the mlp_csv_cpu example and ensures that the accuracy is more than expected.
+#    3. Runs the mlp_csv example and ensures that the accuracy is more than expected.
 #
 
 #!/bin/bash
 
 set -e # exit on the first error
-export EXE_NAME=mlp_csv_cpu
+export EXE_NAME=mlp_csv
 
-cd $(dirname $(readlink -f $0))/../example
+cd $(dirname $(readlink -f $0))/../
 export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH
 
 if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ];
@@ -42,7 +42,7 @@ cp ../../build/cpp-package/example/${EXE_NAME} .
 python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
 python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000
 
-./${EXE_NAME} 2&> ${EXE_NAME}.log
+./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128,64,10" 2&> ${EXE_NAME}.log
 
 if [ ! -f ${EXE_NAME}.log ];
 then

From 46b4ced5639ea6c5766b6dc29639d3e187675041 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Tue, 2 Oct 2018 11:12:17 -0700
Subject: [PATCH 3/9] Updated the code to address the cpp lint errors.

---
 cpp-package/example/mlp_csv.cpp | 68 +++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 24 deletions(-)

diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp
index 4a8434e6e88c..afddd6641d5e 100644
--- a/cpp-package/example/mlp_csv.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -37,8 +37,7 @@ using namespace mxnet::cpp;
 /*
  * Implementing the mlp symbol with given hidden units configuration.
  */
-Symbol mlp(const std::vector<int> &hidden_units)
-{
+Symbol mlp(const std::vector<int> &hidden_units) {
     auto data = Symbol::Variable("data");
     auto label = Symbol::Variable("label");
 
@@ -62,28 +61,29 @@ Symbol mlp(const std::vector<int> &hidden_units)
 /*
  * Convert the input string of number of hidden units into the vector of integers.
  */
-void getLayers(std::string &hidden_units_string, std::vector<int> &hidden_units)
-{
+std::vector<int> getLayers(std::string hidden_units_string) {
     std::string delimiter = ",";
     size_t pos = 0;
     std::string token;
+    std::vector<int> hidden_units;
     while ((pos = hidden_units_string.find(delimiter)) != std::string::npos) {
         token = hidden_units_string.substr(0, pos);
         hidden_units.push_back(atoi(token.c_str()));
         hidden_units_string.erase(0, pos + delimiter.length());
     }
     hidden_units.push_back(atoi(hidden_units_string.c_str()));
+    return hidden_units;
 }
 
-void printUsage()
-{
+void printUsage() {
     std::cout << "Usage:" << std::endl;
-    std::cout << "mlp_csv_cpu --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units \"128,64,64\"" << std::endl;
-    std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes the column 0 to be label and the rest 784 column to be data." << std::endl;
+    std::cout << "mlp_csv_cpu --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
+              << "--batch_size 100 --hidden_units \"128,64,64\"" << std::endl;
+    std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
+              << "the column 0 to be label and the rest 784 column to be data." << std::endl;
 }
 
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
     const int image_size = 28;
     const int num_mnist_features = image_size * image_size;
     int batch_size = 100;
@@ -123,16 +123,17 @@ int main(int argc, char** argv)
     }
     
     if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
-        std::cout << "ERROR: The mandatory arguments such as path to training and test data or number of hidden units for mlp are not specified." << std::endl << std::endl;
+        std::cout << "ERROR: The mandatory arguments such as path to training and test data or \
+                  number of hidden units for mlp are not specified." << std::endl << std::endl;
         printUsage();
         return 0;
     }
     
-    std::vector<int> hidden_units;
-    getLayers(hidden_units_string, hidden_units);
+    std::vector<int> hidden_units = getLayers(hidden_units_string);
     
     if (hidden_units.empty()) {
-        std::cout << "ERROR: Number of hidden units are not provided in correct format. The numbers need to be separated by ','." << std::endl << std::endl;
+        std::cout << "ERROR: Number of hidden units are not provided in correct format."
+                  << "The numbers need to be separated by ','." << std::endl << std::endl;
         printUsage();
         return 0;
     }
@@ -203,13 +204,22 @@ int main(int argc, char** argv)
              * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
              * Need to reshape this data so that label column can be extracted from this data.
              */
-            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));
+            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
+                                                                 batch_size));
             
-            // Extract the label data by slicing the first column of the data and copy it to "label" arg.
+            /*
+             * Extract the label data by slicing the first column of the data and
+             * copy it to "label" arg.
+             */
             reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
             
-            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg.
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]);
+            /*
+             * Extract the feature data by slicing the columns 1 to 785 of the data and
+             * copy it to "data" arg.
+             */
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,
+                                                                         num_mnist_features)
+                                                                   ).CopyTo(&args["data"]);
             
             exec->Forward(true);
 
@@ -232,14 +242,23 @@ int main(int argc, char** argv)
              * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
              * Need to reshape this data so that label column can be extracted from this data.
              */
-            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));
+            NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
+                                                                 batch_size));
             
-            // Extract the label data by slicing the first column of the data and copy it to "label" arg.
+            /*
+             * Extract the label data by slicing the first column of the data and
+             * copy it to "label" arg.
+             */
             NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size));
             labelData.CopyTo(&args["label"]);
             
-            // Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg.
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]);
+            /*
+             * Extract the feature data by slicing the columns 1 to 785 of the data and
+             * copy it to "data" arg.
+             */
+            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,
+                                                                         num_mnist_features)
+                                                                   ).CopyTo(&args["data"]);
             
             // Forward pass is enough as no gradient is needed when evaluating
             exec->Forward(false);
@@ -247,9 +266,10 @@ int main(int argc, char** argv)
         }
         float duration = std::chrono::duration_cast<std::chrono::milliseconds>
         (toc - tic).count() / 1000.0;
-        LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: " << acc.Get();
+        LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: "
+           << acc.Get();
     }
-    
+
     delete exec;
     MXNotifyShutdown();
     return 0;

From 87ca45c5f86323429133c23c16adfcb3884cb734 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Tue, 2 Oct 2018 11:15:33 -0700
Subject: [PATCH 4/9] Removed the author tag.

---
 cpp-package/example/mlp_csv.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp
index afddd6641d5e..666384eccc11 100644
--- a/cpp-package/example/mlp_csv.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -17,10 +17,6 @@
  * under the License.
  */
 
-/*!
- * Amol Lele amol.github@gmail.com
- */
-
 /*
  * Example: mlp_csv_cpu
  * Description:

From bdf34f3578db60c0f9e67811a1db29b9ef2684c6 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Tue, 2 Oct 2018 15:21:16 -0700
Subject: [PATCH 5/9] Fixing the lint errors and usage message.

---
 cpp-package/example/mlp_csv.cpp | 88 +++++++++++++++++----------------
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp
index 666384eccc11..dc25a3488285 100644
--- a/cpp-package/example/mlp_csv.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -18,7 +18,7 @@
  */
 
 /*
- * Example: mlp_csv_cpu
+ * Example: mlp_csv
  * Description:
  * The following example demonstrates how to use CSVIter. This example creates
  * mlp (multi-layer perceptron) model and trains the MNIST data which is in
@@ -73,10 +73,12 @@ std::vector<int> getLayers(std::string hidden_units_string) {
 
 void printUsage() {
     std::cout << "Usage:" << std::endl;
-    std::cout << "mlp_csv_cpu --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
-              << "--batch_size 100 --hidden_units \"128,64,64\"" << std::endl;
+    std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
+    << "--batch_size 100 --hidden_units \"128,64,64\" [--gpu]" << std::endl;
     std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
-              << "the column 0 to be label and the rest 784 column to be data." << std::endl;
+    << "the column 0 to be label and the rest 784 column to be data." << std::endl;
+    std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, "
+    << "program uses 'gpu' context." <<std::endl;
 }
 
 int main(int argc, char** argv) {
@@ -87,7 +89,7 @@ int main(int argc, char** argv) {
     const float learning_rate = 0.1;
     const float weight_decay = 1e-2;
     bool isGpu = false;
-    
+
     std::string training_set;
     std::string test_set;
     std::string hidden_units_string;
@@ -110,30 +112,30 @@ int main(int argc, char** argv) {
             hidden_units_string = argv[index];
         } else if (strcmp("--gpu", argv[index]) == 0) {
             isGpu = true;
-            index ++;
+            index++;
         } else if (strcmp("--help", argv[index]) == 0) {
             printUsage();
             return 0;
         }
-        index ++;
+        index++;
     }
-    
+
     if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
-        std::cout << "ERROR: The mandatory arguments such as path to training and test data or \
-                  number of hidden units for mlp are not specified." << std::endl << std::endl;
+        std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
+        << "number of hidden units for mlp are not specified." << std::endl << std::endl;
         printUsage();
         return 0;
     }
-    
+
     std::vector<int> hidden_units = getLayers(hidden_units_string);
-    
+
     if (hidden_units.empty()) {
         std::cout << "ERROR: Number of hidden units are not provided in correct format."
-                  << "The numbers need to be separated by ','." << std::endl << std::endl;
+        << "The numbers need to be separated by ','." << std::endl << std::endl;
         printUsage();
         return 0;
     }
-    
+
     /*
      * The MNIST data in CSV format has 785 columns.
      * The first column is "Label" and rest of the columns contain data.
@@ -142,10 +144,10 @@ int main(int argc, char** argv) {
      */
     auto train_iter = MXDataIter("CSVIter")
     .SetParam("data_csv", training_set)
-    .SetParam("data_shape", Shape(num_mnist_features + 1,1))
+    .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
     .SetParam("batch_size", batch_size)
     .SetParam("flat", 1)
-    .SetParam("shuffle",0)
+    .SetParam("shuffle", 0)
     .CreateDataIter();
 
     auto val_iter = MXDataIter("CSVIter")
@@ -153,7 +155,7 @@ int main(int argc, char** argv) {
     .SetParam("data_shape", Shape(num_mnist_features + 1, 1))
     .SetParam("batch_size", batch_size)
     .SetParam("flat", 1)
-    .SetParam("shuffle",0)
+    .SetParam("shuffle", 0)
     .CreateDataIter();
 
     auto net = mlp(hidden_units);
@@ -162,61 +164,61 @@ int main(int argc, char** argv) {
     if (isGpu) {
         ctx = Context::gpu();
     }
-    
+
     std::map<std::string, NDArray> args;
     args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
     args["label"] = NDArray(Shape(batch_size), ctx);
     // Let MXNet infer shapes other parameters such as weights
     net.InferArgsMap(ctx, &args, args);
-    
+
     // Initialize all parameters with uniform distribution U(-0.01, 0.01)
     auto initializer = Uniform(0.01);
     for (auto& arg : args) {
         // arg.first is parameter name, and arg.second is the value
         initializer(arg.first, &arg.second);
     }
-    
+
     // Create sgd optimizer
     Optimizer* opt = OptimizerRegistry::Find("sgd");
     opt->SetParam("rescale_grad", 1.0/batch_size)
-        ->SetParam("lr", learning_rate)
-        ->SetParam("wd", weight_decay);
-    
+    ->SetParam("lr", learning_rate)
+    ->SetParam("wd", weight_decay);
+
     // Create executor by binding parameters to the model
     auto *exec = net.SimpleBind(ctx, args);
     auto arg_names = net.ListArguments();
-    
+
     // Start training
     for (int iter = 0; iter < max_epoch; ++iter) {
         int samples = 0;
         train_iter.Reset();
-        
+
         auto tic = std::chrono::system_clock::now();
         while (train_iter.Next()) {
             samples += batch_size;
             auto data_batch = train_iter.GetDataBatch();
-            
+
             /*
              * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
              * Need to reshape this data so that label column can be extracted from this data.
              */
             NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
                                                                  batch_size));
-            
+
             /*
              * Extract the label data by slicing the first column of the data and
              * copy it to "label" arg.
              */
-            reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
-            
+            reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);
+
             /*
              * Extract the feature data by slicing the columns 1 to 785 of the data and
              * copy it to "data" arg.
              */
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,
-                                                                         num_mnist_features)
-                                                                   ).CopyTo(&args["data"]);
-            
+            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
+                                                                         num_mnist_features))
+                                                           .CopyTo(&args["data"]);
+
             exec->Forward(true);
 
             // Compute gradients
@@ -228,34 +230,34 @@ int main(int argc, char** argv) {
             }
         }
         auto toc = std::chrono::system_clock::now();
-        
+
         Accuracy acc;
         val_iter.Reset();
         while (val_iter.Next()) {
             auto data_batch = val_iter.GetDataBatch();
-            
+
             /*
              * The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
              * Need to reshape this data so that label column can be extracted from this data.
              */
             NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
                                                                  batch_size));
-            
+
             /*
              * Extract the label data by slicing the first column of the data and
              * copy it to "label" arg.
              */
-            NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size));
+            NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));
             labelData.CopyTo(&args["label"]);
-            
+
             /*
              * Extract the feature data by slicing the columns 1 to 785 of the data and
              * copy it to "data" arg.
              */
-            reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,
-                                                                         num_mnist_features)
-                                                                   ).CopyTo(&args["data"]);
-            
+            reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
+                                                                         num_mnist_features))
+                                                                   .CopyTo(&args["data"]);
+
             // Forward pass is enough as no gradient is needed when evaluating
             exec->Forward(false);
             acc.Update(labelData, exec->outputs[0]);
@@ -263,7 +265,7 @@ int main(int argc, char** argv) {
         float duration = std::chrono::duration_cast<std::chrono::milliseconds>
         (toc - tic).count() / 1000.0;
         LG << "Epoch[" << iter << "]  " << samples/duration << " samples/sec Accuracy: "
-           << acc.Get();
+        << acc.Get();
     }
 
     delete exec;

From 02e784aaf927d465447d08a978b202bd5677a979 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Tue, 2 Oct 2018 17:21:45 -0700
Subject: [PATCH 6/9] Update README file for cpp-package and provide README
 file for example directory.

---
 cpp-package/README.md         |  45 ++++++++++----
 cpp-package/example/README.md | 112 ++++++++++++++++++++++++++++++++++
 2 files changed, 145 insertions(+), 12 deletions(-)
 create mode 100644 cpp-package/example/README.md

diff --git a/cpp-package/README.md b/cpp-package/README.md
index 2b6e0e39f0fd..f7e48ddd7775 100644
--- a/cpp-package/README.md
+++ b/cpp-package/README.md
@@ -1,21 +1,42 @@
 # MXNet C++ Package
 
-To build the C++ package, please refer to [this guide](<https://mxnet.incubator.apache.org/install/build_from_source#build-the-c-package>).
+The MXNet C++ Package provides C++ API bindings to the users of MXNet.  Currently, these bindings are not available as standalone package.
+The users of these bindings are required to build this package as mentioned below.
 
-A basic tutorial can be found at <https://mxnet.incubator.apache.org/tutorials/c++/basics.html>.
+## Building C++ Package
 
-The example directory contains examples for you to get started.
+The cpp-package directory contains the implementation of C++ API. As mentioned above, users are required to build this directory or package before using it.
+**The cpp-package is built while building the MXNet shared library, *libmxnet.so*.**
+
+###Steps to build the C++ package:
+1.  Building the MXNet C++ package requires building MXNet from source.
+2.  Clone the MXNet github repository **recursively** to ensure the code in submodules is available for building MXNet.
+3.  Install the [prerequisites](<https://mxnet.incubator.apache.org/install/build_from_source#prerequisites>), desired [BLAS libraries](<https://mxnet.incubator.apache.org/install/build_from_source#blas-library>) and optional [OpenCV, CUDA, and cuDNN](<https://mxnet.incubator.apache.org/install/build_from_source#optional>) for building MXNet from source.
+4.  There is a configuration file for make, [make/config.mk](<https://github.com/apache/incubator-mxnet/blob/master/make/config.mk>) that contains all the compilation options. You can edit this file and set the appropriate options prior to running the **make** command.
+5.  Please refer to  [platfrom specific build instructions](<https://mxnet.incubator.apache.org/install/build_from_source#build-instructions-by-operating-system>) and available [build configurations](https://mxnet.incubator.apache.org/install/build_from_source#build-configurations) for more details. 
+5.  For enabling the build of C++ Package, set the **USE__CPP__PACKAGE = 1** in [make/config.mk](<https://github.com/apache/incubator-mxnet/blob/master/make/config.mk>). Optionally, the compilation flag can also be specified on **make** command line as follows:
+	```
+	make -j USE_CPP_PACKAGE=1 
+	```
+
+## Usage
+
+In order to consume the C++ API please follow the steps below
 
-## Building C++ examples in examples folder
+1. Ensure that the MXNet shared library is built from source with the **USE__CPP__PACKAGE = 1**.
+2. Include the [MxNetCpp.h](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/include/mxnet-cpp/MxNetCpp.h>) in the program that is going to consume MXNet C++ API.
+	```
+	#include <mxnet-cpp/MxNetCpp.h>
+	```
+3. While building the program, ensure that the correct paths to the directories containing header files and MxNet shared library.
+4. The program links MxNet shared library dynamically. Hence the library needs to be accessible to the program during the runtime. This can be achieved by including the path to shared library to environment variable such as LD_LIBRARY_PATH.
 
-From cpp-package/examples directory
--  Build all examples in release mode: **make all**
--  Build all examples in debug mode : **make debug**
 
-By default, the examples are build to be run on GPU.
-To build examples to run on CPU:
-- Release: **make all MXNET_USE_CPU=1**  
-- Debug: **make debug MXNET_USE_CPU=1**  
+## Tutorial
 
+A basic tutorial can be found at <https://mxnet.incubator.apache.org/tutorials/c++/basics.html>.
+
+## Examples
+
+The example directory contains examples for you to get started.
 
-The makefile will also download the necessary data files and store in data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.)
diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md
new file mode 100644
index 000000000000..7effdeb40eb7
--- /dev/null
+++ b/cpp-package/example/README.md
@@ -0,0 +1,112 @@
+# MXNet C++ Package Examples
+
+## Building C++ examples
+
+The examples are built while building the MXNet library and cpp-package from source . However, they can be built manually as follows
+
+From cpp-package/examples directory
+
+-  Build all examples in release mode: **make all**
+-  Build all examples in debug mode: **make debug**
+
+By default, the examples are build to be run on GPU. To build examples to run on CPU:
+
+-  Release: **make all MXNET\_USE\_CPU=1**
+-  Debug: **make debug MXNET\_USE\_CPU=1**
+
+The examples that are build to be run on GPU may not work on the non-GPU machines.
+The makefile will also download the necessary data files and store in data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.)
+
+
+## Examples
+
+This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable such as _LD\_LIBRARY\_PATH_ .
+
+### [alexnet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/alexnet.cpp>)
+
+The example implements C++ version of AlexNet. The networks trains the MNIST data. The number of epochs can be specified as command line arguement. For example:
+	```
+	./alexnet 10
+	```
+
+### [charRNN.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/chaRNN.cpp>)
+
+The code implements C++ version charRNN for mxnet\example\rnn\char-rnn.ipynb with MXNet.cpp API. The generated params file is compatiable with python version. The train() and predict() has been verified with original data samples.
+
+The example expects arguments as follows:
+
+```
+	./charRNN train [BuildIn\ [TImeMajor] {corpus file} { batch size} { max epoch} [{starting epoch}]
+	./charRNN predict [BuildIn\ [TImeMajor] {param file} { batch size} { max epoch} [{starting epoch}]
+```
+ 
+### [googlenet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/googlenet.cpp>)
+
+The code implements GoogLeNet/Inception network using C++ API. The example uses MNIST data to train the network. The number of epochs can be specified in the command line as follows. If not specified, the model trains for 100 epochs.
+
+```
+./googlenet 10
+```
+
+### [mlp.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp.cpp>)
+
+The code implements multilayer perceptron from scratch. The example creates its own dummy data to train the model. The example does not require command line parameters. It trains the model for 20000 iterations.
+
+```
+./mlp
+```
+
+### [mlp_cpu.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_cpu.cpp>)
+
+The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and MNISTIter. The example is designed to work on CPU. The example does not require command line parameters.
+
+```
+./mlp_cpu
+```
+
+### [mlp_gpu.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_gpu.cpp>)
+The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and MNISTIter. The example is designed to work on GPU. The example does not require command line paratmeters.
+
+```
+./mlp_gpu
+```
+
+### [mlp_csv.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_csv.cpp>)
+The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and CSVIter. The CSVIter can iterate data that is in CSV format. The example can be run on CPU or GPU. The example usage is as follows:
+
+```
+mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units "128,64,64 [--gpu]"
+```
+
+### [resnet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/resnet.cpp>)
+
+The code implements resnet model using C++ API. The model is used to train MNIST data. The number of epochs for training the model can be specified on the command line. By default, model is trained for 100 epochs.
+
+```
+./resnet 10
+```
+
+### [lenet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/lenet.cpp>)
+
+The code implements lenet model using C++ API. It uses MNIST training data in CSV format to train the network. The example does not use built-in CSVIter to read the data from CSV file. The number of epochs can be specified on the command line. By default, the mode is trained for 100000 epochs.
+
+```
+./lenet 10
+```
+### [lenet\_with\_mxdataiter.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_cpu.cpp>)
+
+The code implements lenet model using C++ API. It uses MNIST training data to train the network. The example uses built-in MNISTIter to read the data. The number of epochs can be specified on the command line. By default, the mode is trained for 100 epochs.
+
+```
+./lenet\_with\_mxdataiter 10
+```
+
+In addition, there is `run_lenet_with_mxdataiter.sh` that downloads the mnist data and run `lenet_with_mxdataiter` example.
+
+###[inception_bn.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/inception_bn.cpp>)
+
+The code implements Inception network using C++ API with batch normalization. The example uses MNIST data to train the network. The model trains for 100 epochs.
+
+```
+./inception_bn
+```

From f92250940992fb3d5185c17ad8690a269d8c3fec Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Tue, 2 Oct 2018 17:49:28 -0700
Subject: [PATCH 7/9] Revert "Update README file for cpp-package and provide
 README file for example directory."

This reverts commit 02e784aaf927d465447d08a978b202bd5677a979.

These files were part of fix for JIRA issue 1017. These files were mistakenly committed in this PR.
---
 cpp-package/README.md         |  45 ++++----------
 cpp-package/example/README.md | 112 ----------------------------------
 2 files changed, 12 insertions(+), 145 deletions(-)
 delete mode 100644 cpp-package/example/README.md

diff --git a/cpp-package/README.md b/cpp-package/README.md
index f7e48ddd7775..2b6e0e39f0fd 100644
--- a/cpp-package/README.md
+++ b/cpp-package/README.md
@@ -1,42 +1,21 @@
 # MXNet C++ Package
 
-The MXNet C++ Package provides C++ API bindings to the users of MXNet.  Currently, these bindings are not available as standalone package.
-The users of these bindings are required to build this package as mentioned below.
+To build the C++ package, please refer to [this guide](<https://mxnet.incubator.apache.org/install/build_from_source#build-the-c-package>).
 
-## Building C++ Package
-
-The cpp-package directory contains the implementation of C++ API. As mentioned above, users are required to build this directory or package before using it.
-**The cpp-package is built while building the MXNet shared library, *libmxnet.so*.**
-
-###Steps to build the C++ package:
-1.  Building the MXNet C++ package requires building MXNet from source.
-2.  Clone the MXNet github repository **recursively** to ensure the code in submodules is available for building MXNet.
-3.  Install the [prerequisites](<https://mxnet.incubator.apache.org/install/build_from_source#prerequisites>), desired [BLAS libraries](<https://mxnet.incubator.apache.org/install/build_from_source#blas-library>) and optional [OpenCV, CUDA, and cuDNN](<https://mxnet.incubator.apache.org/install/build_from_source#optional>) for building MXNet from source.
-4.  There is a configuration file for make, [make/config.mk](<https://github.com/apache/incubator-mxnet/blob/master/make/config.mk>) that contains all the compilation options. You can edit this file and set the appropriate options prior to running the **make** command.
-5.  Please refer to  [platfrom specific build instructions](<https://mxnet.incubator.apache.org/install/build_from_source#build-instructions-by-operating-system>) and available [build configurations](https://mxnet.incubator.apache.org/install/build_from_source#build-configurations) for more details. 
-5.  For enabling the build of C++ Package, set the **USE__CPP__PACKAGE = 1** in [make/config.mk](<https://github.com/apache/incubator-mxnet/blob/master/make/config.mk>). Optionally, the compilation flag can also be specified on **make** command line as follows:
-	```
-	make -j USE_CPP_PACKAGE=1 
-	```
-
-## Usage
-
-In order to consume the C++ API please follow the steps below
-
-1. Ensure that the MXNet shared library is built from source with the **USE__CPP__PACKAGE = 1**.
-2. Include the [MxNetCpp.h](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/include/mxnet-cpp/MxNetCpp.h>) in the program that is going to consume MXNet C++ API.
-	```
-	#include <mxnet-cpp/MxNetCpp.h>
-	```
-3. While building the program, ensure that the correct paths to the directories containing header files and MxNet shared library.
-4. The program links MxNet shared library dynamically. Hence the library needs to be accessible to the program during the runtime. This can be achieved by including the path to shared library to environment variable such as LD_LIBRARY_PATH.
+A basic tutorial can be found at <https://mxnet.incubator.apache.org/tutorials/c++/basics.html>.
 
+The example directory contains examples for you to get started.
 
-## Tutorial
+## Building C++ examples in examples folder
 
-A basic tutorial can be found at <https://mxnet.incubator.apache.org/tutorials/c++/basics.html>.
+From cpp-package/examples directory
+-  Build all examples in release mode: **make all**
+-  Build all examples in debug mode : **make debug**
 
-## Examples
+By default, the examples are build to be run on GPU.
+To build examples to run on CPU:
+- Release: **make all MXNET_USE_CPU=1**  
+- Debug: **make debug MXNET_USE_CPU=1**  
 
-The example directory contains examples for you to get started.
 
+The makefile will also download the necessary data files and store in data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.)
diff --git a/cpp-package/example/README.md b/cpp-package/example/README.md
deleted file mode 100644
index 7effdeb40eb7..000000000000
--- a/cpp-package/example/README.md
+++ /dev/null
@@ -1,112 +0,0 @@
-# MXNet C++ Package Examples
-
-## Building C++ examples
-
-The examples are built while building the MXNet library and cpp-package from source . However, they can be built manually as follows
-
-From cpp-package/examples directory
-
--  Build all examples in release mode: **make all**
--  Build all examples in debug mode: **make debug**
-
-By default, the examples are build to be run on GPU. To build examples to run on CPU:
-
--  Release: **make all MXNET\_USE\_CPU=1**
--  Debug: **make debug MXNET\_USE\_CPU=1**
-
-The examples that are build to be run on GPU may not work on the non-GPU machines.
-The makefile will also download the necessary data files and store in data folder. (The download will take couple of minutes, but will be done only once on a fresh installation.)
-
-
-## Examples
-
-This directory contains following examples. In order to run the examples, ensure that the path to the MXNet shared library is added to the OS specific environment variable such as _LD\_LIBRARY\_PATH_ .
-
-### [alexnet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/alexnet.cpp>)
-
-The example implements C++ version of AlexNet. The networks trains the MNIST data. The number of epochs can be specified as command line arguement. For example:
-	```
-	./alexnet 10
-	```
-
-### [charRNN.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/chaRNN.cpp>)
-
-The code implements C++ version charRNN for mxnet\example\rnn\char-rnn.ipynb with MXNet.cpp API. The generated params file is compatiable with python version. The train() and predict() has been verified with original data samples.
-
-The example expects arguments as follows:
-
-```
-	./charRNN train [BuildIn\ [TImeMajor] {corpus file} { batch size} { max epoch} [{starting epoch}]
-	./charRNN predict [BuildIn\ [TImeMajor] {param file} { batch size} { max epoch} [{starting epoch}]
-```
- 
-### [googlenet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/googlenet.cpp>)
-
-The code implements GoogLeNet/Inception network using C++ API. The example uses MNIST data to train the network. The number of epochs can be specified in the command line as follows. If not specified, the model trains for 100 epochs.
-
-```
-./googlenet 10
-```
-
-### [mlp.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp.cpp>)
-
-The code implements multilayer perceptron from scratch. The example creates its own dummy data to train the model. The example does not require command line parameters. It trains the model for 20000 iterations.
-
-```
-./mlp
-```
-
-### [mlp_cpu.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_cpu.cpp>)
-
-The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and MNISTIter. The example is designed to work on CPU. The example does not require command line parameters.
-
-```
-./mlp_cpu
-```
-
-### [mlp_gpu.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_gpu.cpp>)
-The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and MNISTIter. The example is designed to work on GPU. The example does not require command line paratmeters.
-
-```
-./mlp_gpu
-```
-
-### [mlp_csv.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_csv.cpp>)
-The code implements multilayer perceptron to train the MNIST data. The code demonstrates the use of "SimpleBind"  C++ API and CSVIter. The CSVIter can iterate data that is in CSV format. The example can be run on CPU or GPU. The example usage is as follows:
-
-```
-mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units "128,64,64 [--gpu]"
-```
-
-### [resnet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/resnet.cpp>)
-
-The code implements resnet model using C++ API. The model is used to train MNIST data. The number of epochs for training the model can be specified on the command line. By default, model is trained for 100 epochs.
-
-```
-./resnet 10
-```
-
-### [lenet.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/lenet.cpp>)
-
-The code implements lenet model using C++ API. It uses MNIST training data in CSV format to train the network. The example does not use built-in CSVIter to read the data from CSV file. The number of epochs can be specified on the command line. By default, the mode is trained for 100000 epochs.
-
-```
-./lenet 10
-```
-### [lenet\_with\_mxdataiter.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/mlp_cpu.cpp>)
-
-The code implements lenet model using C++ API. It uses MNIST training data to train the network. The example uses built-in MNISTIter to read the data. The number of epochs can be specified on the command line. By default, the mode is trained for 100 epochs.
-
-```
-./lenet\_with\_mxdataiter 10
-```
-
-In addition, there is `run_lenet_with_mxdataiter.sh` that downloads the mnist data and run `lenet_with_mxdataiter` example.
-
-###[inception_bn.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/inception_bn.cpp>)
-
-The code implements Inception network using C++ API with batch normalization. The example uses MNIST data to train the network. The model trains for 100 epochs.
-
-```
-./inception_bn
-```

From a315f74ba9e0d6066e45c50c976ad00e2264afeb Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Wed, 3 Oct 2018 13:04:30 -0700
Subject: [PATCH 8/9] Addressed the review comments regarding usage of atoi and
 avoiding string copy.

---
 cpp-package/example/mlp_csv.cpp               | 26 +++++++++----------
 .../example/unittests/unit_test_mlp_csv.sh    |  2 +-
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp
index dc25a3488285..faa42e7096da 100644
--- a/cpp-package/example/mlp_csv.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -57,24 +57,22 @@ Symbol mlp(const std::vector<int> &hidden_units) {
 /*
  * Convert the input string of number of hidden units into the vector of integers.
  */
-std::vector<int> getLayers(std::string hidden_units_string) {
-    std::string delimiter = ",";
-    size_t pos = 0;
-    std::string token;
+std::vector<int> getLayers(const std::string& hidden_units_string) {
     std::vector<int> hidden_units;
-    while ((pos = hidden_units_string.find(delimiter)) != std::string::npos) {
-        token = hidden_units_string.substr(0, pos);
-        hidden_units.push_back(atoi(token.c_str()));
-        hidden_units_string.erase(0, pos + delimiter.length());
+    char *pNext;
+    int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10);
+    hidden_units.push_back(num_unit);
+    while (*pNext) {
+        num_unit = strtol(pNext, &pNext, 10);
+        hidden_units.push_back(num_unit);
     }
-    hidden_units.push_back(atoi(hidden_units_string.c_str()));
     return hidden_units;
 }
 
 void printUsage() {
     std::cout << "Usage:" << std::endl;
     std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
-    << "--batch_size 100 --hidden_units \"128,64,64\" [--gpu]" << std::endl;
+    << "--batch_size 100 --hidden_units \"128 64 64\" [--gpu]" << std::endl;
     std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
     << "the column 0 to be label and the rest 784 column to be data." << std::endl;
     std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, "
@@ -124,16 +122,16 @@ int main(int argc, char** argv) {
         std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
         << "number of hidden units for mlp are not specified." << std::endl << std::endl;
         printUsage();
-        return 0;
+        return 1;
     }
 
     std::vector<int> hidden_units = getLayers(hidden_units_string);
 
     if (hidden_units.empty()) {
         std::cout << "ERROR: Number of hidden units are not provided in correct format."
-        << "The numbers need to be separated by ','." << std::endl << std::endl;
+        << "The numbers need to be separated by ' '." << std::endl << std::endl;
         printUsage();
-        return 0;
+        return 1;
     }
 
     /*
@@ -178,7 +176,7 @@ int main(int argc, char** argv) {
         initializer(arg.first, &arg.second);
     }
 
-    // Create sgd optimizer
+    // Create sgd optimiz er
     Optimizer* opt = OptimizerRegistry::Find("sgd");
     opt->SetParam("rescale_grad", 1.0/batch_size)
     ->SetParam("lr", learning_rate)
diff --git a/cpp-package/example/unittests/unit_test_mlp_csv.sh b/cpp-package/example/unittests/unit_test_mlp_csv.sh
index 4c2fe87eb86d..55ddcdecaafd 100755
--- a/cpp-package/example/unittests/unit_test_mlp_csv.sh
+++ b/cpp-package/example/unittests/unit_test_mlp_csv.sh
@@ -42,7 +42,7 @@ cp ../../build/cpp-package/example/${EXE_NAME} .
 python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
 python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000
 
-./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128,64,10" 2&> ${EXE_NAME}.log
+./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128 64 10" 2&> ${EXE_NAME}.log
 
 if [ ! -f ${EXE_NAME}.log ];
 then

From 387f2a4ce4934c883b37a18cd1bed136b39f8fb2 Mon Sep 17 00:00:00 2001
From: Amol Lele <19983848+leleamol@users.noreply.github.com>
Date: Wed, 3 Oct 2018 13:10:15 -0700
Subject: [PATCH 9/9] Updated to use strtol instead of atoi

---
 cpp-package/example/mlp_csv.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp-package/example/mlp_csv.cpp b/cpp-package/example/mlp_csv.cpp
index faa42e7096da..8aec4b76d917 100644
--- a/cpp-package/example/mlp_csv.cpp
+++ b/cpp-package/example/mlp_csv.cpp
@@ -101,10 +101,10 @@ int main(int argc, char** argv) {
             test_set = argv[index];
         } else if (strcmp("--epochs", argv[index]) == 0) {
             index++;
-            max_epoch = atoi(argv[index]);
+            max_epoch = strtol(argv[index], NULL, 10);
         } else if (strcmp("--batch_size", argv[index]) == 0) {
             index++;
-            batch_size = atoi(argv[index]);
+            batch_size = strtol(argv[index], NULL, 10);
         } else if (strcmp("--hidden_units", argv[index]) == 0) {
             index++;
             hidden_units_string = argv[index];