Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-982] Provide example to illustrate usage of CSVIter in C++ API #12636

Merged
merged 14 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions cpp-package/example/mlp_csv_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Amol Lele [email protected]
*/

/*
* Example: mlp_csv_cpu
* Description:
* The following example demonstrates how to use CSVIter. This example creates
* mlp (multi-layer perceptron) model and trains the MNIST data which is in
* CSV format.
*/
#include <chrono>
#include "utils.h"
#include "mxnet-cpp/MxNetCpp.h"

using namespace mxnet::cpp;

/*
* Implementing the mlp symbol with given layer configuration.
*/
Symbol mlp(const std::vector<int> &layers)
{
auto x = Symbol::Variable("X");
auto label = Symbol::Variable("label");

std::vector<Symbol> weights(layers.size());
std::vector<Symbol> biases(layers.size());
std::vector<Symbol> outputs(layers.size());

for (size_t i = 0; i < layers.size(); ++i) {
weights[i] = Symbol::Variable("w" + std::to_string(i));
biases[i] = Symbol::Variable("b" + std::to_string(i));
Symbol fc = FullyConnected(
i == 0? x : outputs[i-1], // data
weights[i],
biases[i],
layers[i]);
outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu);
}
return SoftmaxOutput(outputs.back(), label);
}


int main(int argc, char** argv)
{
const int image_size = 28;
const int num_mnist_features = image_size * image_size;
const std::vector<int> layers{128, 64, 10};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

layers->hidden_layer_size? layers generally is referred to convolution, pooling, dense, etc.,

const int batch_size = 100;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make all of these configurable, lets make sure we follow the same as Python Examples.

const int max_epoch = 10;
const float learning_rate = 0.1;
const float weight_decay = 1e-2;

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nicely commented, this is very useful for examples 👍

* The MNIST data in CSV format has 785 columns.
* The first column is "Label" and rest of the columns contain data.
* The mnist_train.csv has 60000 records and mnist_test.csv has
* 10000 records.
*/
std::vector<std::string> data_files = { "./data/mnist_train.csv",
"./data/mnist_test.csv"};

auto train_iter = MXDataIter("CSVIter")
.SetParam("data_csv", "./data/mnist_train.csv")
.SetParam("data_shape", Shape(num_mnist_features + 1,1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle",0)
.CreateDataIter();

auto val_iter = MXDataIter("CSVIter")
.SetParam("data_csv", "./data/mnist_test.csv")
.SetParam("data_shape", Shape(num_mnist_features + 1, 1))
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.SetParam("shuffle",0)
.CreateDataIter();

auto net = mlp(layers);

Context ctx = Context::cpu(); // Use CPU for training
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make this configurable?


std::map<std::string, NDArray> args;
args["X"] = NDArray(Shape(batch_size, num_mnist_features), ctx);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why "X"? we use "data" for the input layer

args["label"] = NDArray(Shape(batch_size), ctx);
// Let MXNet infer shapes other parameters such as weights
net.InferArgsMap(ctx, &args, args);

// Initialize all parameters with uniform distribution U(-0.01, 0.01)
auto initializer = Uniform(0.01);
for (auto& arg : args) {
// arg.first is parameter name, and arg.second is the value
initializer(arg.first, &arg.second);
}

// Create sgd optimizer
Optimizer* opt = OptimizerRegistry::Find("sgd");
opt->SetParam("rescale_grad", 1.0/batch_size)
->SetParam("lr", learning_rate)
->SetParam("wd", weight_decay);

// Create executor by binding parameters to the model
auto *exec = net.SimpleBind(ctx, args);
auto arg_names = net.ListArguments();

// Start training
for (int iter = 0; iter < max_epoch; ++iter) {
int samples = 0;
train_iter.Reset();

auto tic = std::chrono::system_clock::now();
while (train_iter.Next()) {
samples += batch_size;
auto data_batch = train_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));

// Extract the label data by slicing the first column of the data and copy it to "label" arg.
reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]);

// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);

// Compute gradients
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this comment to the next line(forward does not compute gradients)

exec->Forward(true);
exec->Backward();
// Update parameters
for (size_t i = 0; i < arg_names.size(); ++i) {
if (arg_names[i] == "X" || arg_names[i] == "label") continue;
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]);
}
}
auto toc = std::chrono::system_clock::now();

Accuracy acc;
val_iter.Reset();
while (val_iter.Next()) {
auto data_batch = val_iter.GetDataBatch();

/*
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1))
* Need to reshape this data so that label column can be extracted from this data.
*/
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size));

// Extract the label data by slicing the first column of the data and copy it to "label" arg.
NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size));
labelData.CopyTo(&args["label"]);

// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg.
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]);

// Forward pass is enough as no gradient is needed when evaluating
exec->Forward(false);
acc.Update(labelData, exec->outputs[0]);
}
float duration = std::chrono::duration_cast<std::chrono::milliseconds>
(toc - tic).count() / 1000.0;
LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you follow the same format as the Python Examples.
see this #12182 (comment)

}

delete exec;
MXNotifyShutdown();
return 0;
}
59 changes: 59 additions & 0 deletions cpp-package/example/mnist_to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Following file converts the mnist data to CSV format.
# Usage:
# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000
# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000
#

import argparse

def convert_to_csv(args):
imageFile = open(args.imageFile, "rb")
labelFile = open(args.labelFile, "rb")
outputFile = open(args.outputFile, "w")

imageFile.read(16)
labelFile.read(8)
images = []

for i in range(args.num_records):
image = [ord(labelFile.read(1))]
for j in range(28 * 28):
image.append(ord(imageFile.read(1)))
images.append(image)

for image in images:
outputFile.write(",".join(str(pix) for pix in image) + "\n")

imageFile.close()
outputFile.close()
labelFile.close()

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte")
parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte")
parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv")
parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000")
args = parser.parse_args()

try:
convert_to_csv(args)
except Exception as e:
print("Error : Exception {}".format(str(e)))
63 changes: 63 additions & 0 deletions cpp-package/unittests/unit_test_mlp_csv_cpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# This file is a unit test for mlp_csv_cpu.cpp example in 'example' directory.
# The file
# 1. Downloads the MNIST data,
# 2. Converts it into CSV format.
# 3. Runs the mlp_csv_cpu example and ensures that the accuracy is more than expected.
#

#!/bin/bash

set -e # exit on the first error
export EXE_NAME=mlp_csv_cpu

cd $(dirname $(readlink -f $0))/../example
export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH

if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ];
then
echo "FAIL: ${EXE_NAME} does not exist"
exit
fi

cp ../../build/cpp-package/example/${EXE_NAME} .

./get_data.sh
python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000
python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000

./${EXE_NAME} 2&> ${EXE_NAME}.log

if [ ! -f ${EXE_NAME}.log ];
then
echo "FAIL: Log file ${EXE_NAME}.log does not exist."
exit
fi

# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format.
export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'`
export Acc_expected=0.98

# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL.
if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ;
then
echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy."
else
echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected."
fi