Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-982] Provide example to illustrate usage of CSVIter in C++ API #12636

Merged
merged 14 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 272 additions & 0 deletions cpp-package/example/mlp_csv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Example: mlp_csv
* Description:
* The following example demonstrates how to use CSVIter. This example creates
* mlp (multi-layer perceptron) model and trains the MNIST data which is in
* CSV format.
*/
#include <chrono>
#include "utils.h"
#include "mxnet-cpp/MxNetCpp.h"

using namespace mxnet::cpp;

/*
* Implementing the mlp symbol with given hidden units configuration.
*/
Symbol mlp(const std::vector<int> &hidden_units) {
auto data = Symbol::Variable("data");
auto label = Symbol::Variable("label");

std::vector<Symbol> weights(hidden_units.size());
std::vector<Symbol> biases(hidden_units.size());
std::vector<Symbol> outputs(hidden_units.size());

for (size_t i = 0; i < hidden_units.size(); ++i) {
weights[i] = Symbol::Variable("w" + std::to_string(i));
biases[i] = Symbol::Variable("b" + std::to_string(i));
Symbol fc = FullyConnected(
i == 0? data : outputs[i-1], // data
weights[i],
biases[i],
hidden_units[i]);
outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
}
return SoftmaxOutput(outputs.back(), label);
}

/*
* Convert the input string of number of hidden units into the vector of integers.
*/
std::vector<int> getLayers(const std::string& hidden_units_string) {
std::vector<int> hidden_units;
char *pNext;
int num_unit = strtol(hidden_units_string.c_str(), &pNext, 10);
hidden_units.push_back(num_unit);
while (*pNext) {
num_unit = strtol(pNext, &pNext, 10);
hidden_units.push_back(num_unit);
}
return hidden_units;
}

void printUsage() {
std::cout << "Usage:" << std::endl;
std::cout << "mlp_csv --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 "
<< "--batch_size 100 --hidden_units \"128 64 64\" [--gpu]" << std::endl;
std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes "
<< "the column 0 to be label and the rest 784 column to be data." << std::endl;
std::cout << "By default, the example uses 'cpu' context. If '--gpu' is specified, "
<< "program uses 'gpu' context." <<std::endl;
}

int main(int argc, char** argv) {
const int image_size = 28;
const int num_mnist_features = image_size * image_size;
int batch_size = 100;
int max_epoch = 10;
const float learning_rate = 0.1;
const float weight_decay = 1e-2;
bool isGpu = false;

std::string training_set;
std::string test_set;
std::string hidden_units_string;
int index = 1;
while (index < argc) {
if (strcmp("--train", argv[index]) == 0) {
index++;
training_set = argv[index];
} else if (strcmp("--test", argv[index]) == 0) {
index++;
test_set = argv[index];
} else if (strcmp("--epochs", argv[index]) == 0) {
index++;
max_epoch = strtol(argv[index], NULL, 10);
} else if (strcmp("--batch_size", argv[index]) == 0) {
index++;
batch_size = strtol(argv[index], NULL, 10);
} else if (strcmp("--hidden_units", argv[index]) == 0) {
index++;
hidden_units_string = argv[index];
} else if (strcmp("--gpu", argv[index]) == 0) {
isGpu = true;
index++;
} else if (strcmp("--help", argv[index]) == 0) {
printUsage();
return 0;
}
index++;
}

if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) {
std::cout << "ERROR: The mandatory arguments such as path to training and test data or "
<< "number of hidden units for mlp are not specified." << std::endl << std::endl;
printUsage();
return 1;
}

std::vector<int> hidden_units = getLayers(hidden_units_string);

if (hidden_units.empty()) {
std::cout << "ERROR: Number of hidden units are not provided in correct format."
<< "The numbers need to be separated by ' '." << std::endl << std::endl;
printUsage();
return 1;
}

/*
* The MNIST data in CSV format has 785 columns.
* The first column is "Label" and rest of the columns contain data.
* The mnist_train.csv has 60000 records and mnist_test.csv has
* 10000 records.
*/
auto train_iter = MXDataIter("CSVIter")
.SetParam("data_csv", training_set)
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle", 0)
.CreateDataIter();

auto val_iter = MXDataIter("CSVIter")
.SetParam("data_csv", test_set)
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle", 0)
.CreateDataIter();

auto net = mlp(hidden_units);

Context ctx = Context::cpu();
if (isGpu) {
ctx = Context::gpu();
}

std::map<std::string, NDArray> args;
args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
args["label"] = NDArray(Shape(batch_size), ctx);
// Let MXNet infer shapes other parameters such as weights
net.InferArgsMap(ctx, &args, args);

// Initialize all parameters with uniform distribution U(-0.01, 0.01)
auto initializer = Uniform(0.01);
for (auto& arg : args) {
// arg.first is parameter name, and arg.second is the value
initializer(arg.first, &arg.second);
}

// Create sgd optimiz er
Optimizer* opt = OptimizerRegistry::Find("sgd");
opt->SetParam("rescale_grad", 1.0/batch_size)
->SetParam("lr", learning_rate)
->SetParam("wd", weight_decay);

// Create executor by binding parameters to the model
auto *exec = net.SimpleBind(ctx, args);
auto arg_names = net.ListArguments();

// Start training
for (int iter = 0; iter < max_epoch; ++iter) {
int samples = 0;
train_iter.Reset();

auto tic = std::chrono::system_clock::now();
while (train_iter.Next()) {
samples += batch_size;
auto data_batch = train_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
batch_size));

/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
reshapedData.Slice(0, 1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);

/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
num_mnist_features))
.CopyTo(&args["data"]);

exec->Forward(true);

// Compute gradients
exec->Backward();
// Update parameters
for (size_t i = 0; i < arg_names.size(); ++i) {
if (arg_names[i] == "data" || arg_names[i] == "label") continue;
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
}
}
auto toc = std::chrono::system_clock::now();

Accuracy acc;
val_iter.Reset();
while (val_iter.Next()) {
auto data_batch = val_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),
batch_size));

/*
* Extract the label data by slicing the first column of the data and
* copy it to "label" arg.
*/
NDArray labelData = reshapedData.Slice(0, 1).Reshape(Shape(batch_size));
labelData.CopyTo(&args["label"]);

/*
* Extract the feature data by slicing the columns 1 to 785 of the data and
* copy it to "data" arg.
*/
reshapedData.Slice(1, (num_mnist_features + 1)).Reshape(Shape(batch_size,
num_mnist_features))
.CopyTo(&args["data"]);

// Forward pass is enough as no gradient is needed when evaluating
exec->Forward(false);
acc.Update(labelData, exec->outputs[0]);
}
float duration = std::chrono::duration_cast<std::chrono::milliseconds>
(toc - tic).count() / 1000.0;
LG << "Epoch[" << iter << "] " << samples/duration << " samples/sec Accuracy: "
<< acc.Get();
}

delete exec;
MXNotifyShutdown();
return 0;
}
59 changes: 59 additions & 0 deletions cpp-package/example/mnist_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Following file converts the mnist data to CSV format.
# Usage:
# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000
# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000
#

import argparse

def convert_to_csv(args):
imageFile = open(args.imageFile, "rb")
labelFile = open(args.labelFile, "rb")
outputFile = open(args.outputFile, "w")

imageFile.read(16)
labelFile.read(8)
images = []

for i in range(args.num_records):
image = [ord(labelFile.read(1))]
for j in range(28 * 28):
image.append(ord(imageFile.read(1)))
images.append(image)

for image in images:
outputFile.write(",".join(str(pix) for pix in image) + "\n")

imageFile.close()
outputFile.close()
labelFile.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte")
parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte")
parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv")
parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000")
args = parser.parse_args()

try:
convert_to_csv(args)
except Exception as e:
print("Error : Exception {}".format(str(e)))
63 changes: 63 additions & 0 deletions cpp-package/example/unittests/unit_test_mlp_csv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# This file is a unit test for mlp_csv.cpp example in 'example' directory.
# The file
# 1. Downloads the MNIST data,
# 2. Converts it into CSV format.
# 3. Runs the mlp_csv example and ensures that the accuracy is more than expected.
#

#!/bin/bash

set -e # exit on the first error
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering why chose shell script for this? this example as Cpp, Py and sh..may be just use python here, WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose shell script because I found it easier to invoke python and cpp programs from shell and process the output using grep.

There was no other particular reason. We can revisit later and may be tie unit tests for all examples in single framework later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python and check_call is more maintainable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure @larroy . I will consider these suggestions when I will start implementing the test framework.

export EXE_NAME=mlp_csv

cd $(dirname $(readlink -f $0))/../
export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH

if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ];
then
echo "FAIL: ${EXE_NAME} does not exist"
exit
fi

cp ../../build/cpp-package/example/${EXE_NAME} .

./get_data.sh
python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000

./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128 64 10" 2&> ${EXE_NAME}.log

if [ ! -f ${EXE_NAME}.log ];
then
echo "FAIL: Log file ${EXE_NAME}.log does not exist."
exit
fi

# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format.
export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'`
export Acc_expected=0.98

# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL.
if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ;
then
echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy."
else
echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected."
fi