Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-982] Provide example to illustrate usage of CSVIter in C++ API (
Browse files Browse the repository at this point in the history
…#12636)

* Adding the example to demonstrate the usage of CSVIter

* Addressed the review comments to make the example configurable. Moved the unittests folder in 'examples' directory.

* Updated the code to address the cpp lint errors.

* Removed the author tag.

* Fixing the lint errors and usage message.

* Update README file for cpp-package and provide README file for example directory.

* Revert "Update README file for cpp-package and provide README file for example directory."

This reverts commit 02e784a.

These files were part of fix for JIRA issue 1017. These files were mistakenly committed in this PR.

* Addressed the review comments regarding usage of atoi and avoiding string copy.

* Updated to use strtol instead of atoi
  • Loading branch information
leleamol authored and nswamy committed Oct 8, 2018
1 parent 077253d commit 610d79c
Show file tree
Hide file tree
Showing 3 changed files with 394 additions and 0 deletions.
272 changes: 272 additions & 0 deletions cpp-package/example/mlp_csv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Example: mlp_csv
* Description:
* The following example demonstrates how to use CSVIter. This example creates
* mlp (multi-layer perceptron) model and trains the MNIST data which is in
* CSV format.
*/
#include <chrono>
#include "utils.h"
#include "mxnet-cpp/MxNetCpp.h"

using namespace mxnet::cpp;

/*
* Implementing the mlp symbol with given hidden units configuration.
*/
Symbol mlp(const std::vector<int> &hidden_units) {
auto data = Symbol::Variable("data");
auto label = Symbol::Variable("label");

std::vector<Symbol> weights(hidden_units.size());
std::vector<Symbol> biases(hidden_units.size());
std::vector<Symbol> outputs(hidden_units.size());

for (size_t i = 0; i < hidden_units.size(); ++i) {
weights[i] = Symbol::Variable("w" + std::to_string(i));
biases[i] = Symbol::Variable("b" + std::to_string(i));
Symbol fc = FullyConnected(
i == 0? data : outputs[i-1], // data
weights[i],
biases[i],
hidden_units[i]);
outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
}
return SoftmaxOutput(outputs.back(), label);
}

/*
* Convert the input string of number of hidden units into the vector of integers.
*/
std::vector<int> getLayers(const std::string& hidden_units_string) {
std::vector<int> hidden_units;
char *pNext;
int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10);
hidden_units.push_back(num_unit);
while (*pNext) {
num_unit = strtol(pNext, &pNext, 10);
hidden_units.push_back(num_unit);
}
return hidden_units;
}

void printUsage() {
std::cout << "Usage:" << std::endl;
std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
<< "--batch_size 100 --hidden_units \"128 64 64\" [--gpu]" << std::endl;
std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
<< "the column 0 to be label and the rest 784 column to be data." << std::endl;
std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, "
<< "program uses 'gpu' context." <<std::endl;
}

int main(int argc, char** argv) {
const int image_size = 28;
const int num_mnist_features = image_size * image_size;
int batch_size = 100;
int max_epoch = 10;
const float learning_rate = 0.1;
const float weight_decay = 1e-2;
bool isGpu = false;

std::string training_set;
std::string test_set;
std::string hidden_units_string;
int index = 1;
while (index < argc) {
if (strcmp("--train", argv[index]) == 0) {
index++;
training_set = argv[index];
} else if (strcmp("--test", argv[index]) == 0) {
index++;
test_set = argv[index];
} else if (strcmp("--epochs", argv[index]) == 0) {
index++;
max_epoch = strtol(argv[index], NULL, 10);
} else if (strcmp("--batch_size", argv[index]) == 0) {
index++;
batch_size = strtol(argv[index], NULL, 10);
} else if (strcmp("--hidden_units", argv[index]) == 0) {
index++;
hidden_units_string = argv[index];
} else if (strcmp("--gpu", argv[index]) == 0) {
isGpu = true;
index++;
} else if (strcmp("--help", argv[index]) == 0) {
printUsage();
return 0;
}
index++;
}

if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
<< "number of hidden units for mlp are not specified." << std::endl << std::endl;
printUsage();
return 1;
}

std::vector<int> hidden_units = getLayers(hidden_units_string);

if (hidden_units.empty()) {
std::cout << "ERROR: Number of hidden units are not provided in correct format."
<< "The numbers need to be separated by ' '." << std::endl << std::endl;
printUsage();
return 1;
}

/*
* The MNIST data in CSV format has 785 columns.
* The first column is "Label" and rest of the columns contain data.
* The mnist_train.csv has 60000 records and mnist_test.csv has
* 10000 records.
*/
auto train_iter = MXDataIter("CSVIter")
.SetParam("data_csv", training_set)
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle", 0)
.CreateDataIter();

auto val_iter = MXDataIter("CSVIter")
.SetParam("data_csv", test_set)
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle", 0)
.CreateDataIter();

auto net = mlp(hidden_units);

Context ctx = Context::cpu();
if (isGpu) {
ctx = Context::gpu();
}

std::map<std::string, NDArray> args;
args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
args["label"] = NDArray(Shape(batch_size), ctx);
// Let MXNet infer shapes other parameters such as weights
net.InferArgsMap(ctx, &args, args);

// Initialize all parameters with uniform distribution U(-0.01, 0.01)
auto initializer = Uniform(0.01);
for (auto& arg : args) {
// arg.first is parameter name, and arg.second is the value
initializer(arg.first, &arg.second);
}

// Create sgd optimiz er
Optimizer* opt = OptimizerRegistry::Find("sgd");
opt->SetParam("rescale_grad", 1.0/batch_size)
->SetParam("lr", learning_rate)
->SetParam("wd", weight_decay);

// Create executor by binding parameters to the model
auto *exec = net.SimpleBind(ctx, args);
auto arg_names = net.ListArguments();

// Start training
for (int iter = 0; iter < max_epoch; ++iter) {
int samples = 0;
train_iter.Reset();

auto tic = std::chrono::system_clock::now();
while (train_iter.Next()) {
samples += batch_size;
auto data_batch = train_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
batch_size));

/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);

/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
num_mnist_features))
.CopyTo(&args["data"]);

exec->Forward(true);

// Compute gradients
exec->Backward();
// Update parameters
for (size_t i = 0; i < arg_names.size(); ++i) {
if (arg_names[i] == "data" || arg_names[i] == "label") continue;
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
}
}
auto toc = std::chrono::system_clock::now();

Accuracy acc;
val_iter.Reset();
while (val_iter.Next()) {
auto data_batch = val_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
batch_size));

/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));
labelData.CopyTo(&args["label"]);

/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
num_mnist_features))
.CopyTo(&args["data"]);

// Forward pass is enough as no gradient is needed when evaluating
exec->Forward(false);
acc.Update(labelData, exec->outputs[0]);
}
float duration = std::chrono::duration_cast<std::chrono::milliseconds>
(toc - tic).count() / 1000.0;
LG << "Epoch[" << iter << "] " << samples/duration << " samples/sec Accuracy: "
<< acc.Get();
}

delete exec;
MXNotifyShutdown();
return 0;
}
59 changes: 59 additions & 0 deletions cpp-package/example/mnist_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Following file converts the mnist data to CSV format.
# Usage:
# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000
# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000
#

import argparse

def convert_to_csv(args):
imageFile = open(args.imageFile, "rb")
labelFile = open(args.labelFile, "rb")
outputFile = open(args.outputFile, "w")

imageFile.read(16)
labelFile.read(8)
images = []

for i in range(args.num_records):
image = [ord(labelFile.read(1))]
for j in range(28 * 28):
image.append(ord(imageFile.read(1)))
images.append(image)

for image in images:
outputFile.write(",".join(str(pix) for pix in image) + "\n")

imageFile.close()
outputFile.close()
labelFile.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte")
parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte")
parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv")
parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000")
args = parser.parse_args()

try:
convert_to_csv(args)
except Exception as e:
print("Error : Exception {}".format(str(e)))
63 changes: 63 additions & 0 deletions cpp-package/example/unittests/unit_test_mlp_csv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# This file is a unit test for mlp_csv.cpp example in 'example' directory.
# The file
# 1. Downloads the MNIST data,
# 2. Converts it into CSV format.
# 3. Runs the mlp_csv example and ensures that the accuracy is more than expected.
#

#!/bin/bash

set -e # exit on the first error
export EXE_NAME=mlp_csv

cd $(dirname $(readlink -f $0))/../
export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH

if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ];
then
echo "FAIL: ${EXE_NAME} does not exist"
exit
fi

cp ../../build/cpp-package/example/${EXE_NAME} .

./get_data.sh
python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000

./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128 64 10" 2&> ${EXE_NAME}.log

if [ ! -f ${EXE_NAME}.log ];
then
echo "FAIL: Log file ${EXE_NAME}.log does not exist."
exit
fi

# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format.
export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'`
export Acc_expected=0.98

# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL.
if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ;
then
echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy."
else
echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected."
fi

0 comments on commit 610d79c

Please sign in to comment.