-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-982] Provide example to illustrate usage of CSVIter in C++ API #12636
Changes from 2 commits
0e3f771
735def8
46b4ced
87ca45c
bdf34f3
02e784a
f922509
a315f74
387f2a4
e8a9b14
7ce4da0
1d07b2e
b8338f0
52dd094
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Amol Lele [email protected] | ||
*/ | ||
|
||
/* | ||
* Example: mlp_csv_cpu | ||
* Description: | ||
* The following example demonstrates how to use CSVIter. This example creates | ||
* mlp (multi-layer perceptron) model and trains the MNIST data which is in | ||
* CSV format. | ||
*/ | ||
#include <chrono> | ||
#include "utils.h" | ||
#include "mxnet-cpp/MxNetCpp.h" | ||
|
||
using namespace mxnet::cpp; | ||
|
||
/* | ||
* Implementing the mlp symbol with given hidden units configuration. | ||
*/ | ||
Symbol mlp(const std::vector<int> &hidden_units) | ||
{ | ||
auto data = Symbol::Variable("data"); | ||
auto label = Symbol::Variable("label"); | ||
|
||
std::vector<Symbol> weights(hidden_units.size()); | ||
std::vector<Symbol> biases(hidden_units.size()); | ||
std::vector<Symbol> outputs(hidden_units.size()); | ||
|
||
for (size_t i = 0; i < hidden_units.size(); ++i) { | ||
weights[i] = Symbol::Variable("w" + std::to_string(i)); | ||
biases[i] = Symbol::Variable("b" + std::to_string(i)); | ||
Symbol fc = FullyConnected( | ||
i == 0? data : outputs[i-1], // data | ||
weights[i], | ||
biases[i], | ||
hidden_units[i]); | ||
outputs[i] = i == hidden_units.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); | ||
} | ||
return SoftmaxOutput(outputs.back(), label); | ||
} | ||
|
||
/* | ||
* Convert the input string of number of hidden units into the vector of integers. | ||
*/ | ||
void getLayers(std::string &hidden_units_string, std::vector<int> &hidden_units) | ||
{ | ||
std::string delimiter = ","; | ||
size_t pos = 0; | ||
std::string token; | ||
while ((pos = hidden_units_string.find(delimiter)) != std::string::npos) { | ||
token = hidden_units_string.substr(0, pos); | ||
hidden_units.push_back(atoi(token.c_str())); | ||
hidden_units_string.erase(0, pos + delimiter.length()); | ||
} | ||
hidden_units.push_back(atoi(hidden_units_string.c_str())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is potentially undefined behaviour if hidden_units_string is not a valid int. Correctness is not clear from the immediate context. |
||
} | ||
|
||
void printUsage() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you want to pull out the arg parsing to a different file to keep this clean? may be you can generalize arg parsing later as you add new examples to C++ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would defer pulling out or refactoring it. As we make more changes to these examples, we will get better idea and we can come up with more generic command line parser. |
||
{ | ||
std::cout << "Usage:" << std::endl; | ||
std::cout << "mlp_csv_cpu --train mnist_training_set.csv --test mnist_test_set.csv --epochs 10 --batch_size 100 --hidden_units \"128,64,64\"" << std::endl; | ||
std::cout << "The example uses mnist data in CSV format. The MNIST data in CSV format assumes the column 0 to be label and the rest 784 column to be data." << std::endl; | ||
} | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
const int image_size = 28; | ||
const int num_mnist_features = image_size * image_size; | ||
int batch_size = 100; | ||
int max_epoch = 10; | ||
const float learning_rate = 0.1; | ||
const float weight_decay = 1e-2; | ||
bool isGpu = false; | ||
|
||
std::string training_set; | ||
std::string test_set; | ||
std::string hidden_units_string; | ||
int index = 1; | ||
while (index < argc) { | ||
if (strcmp("--train", argv[index]) == 0) { | ||
index++; | ||
training_set = argv[index]; | ||
} else if (strcmp("--test", argv[index]) == 0) { | ||
index++; | ||
test_set = argv[index]; | ||
} else if (strcmp("--epochs", argv[index]) == 0) { | ||
index++; | ||
max_epoch = atoi(argv[index]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unvalidated atoi is dangerous, strtol might be better. |
||
} else if (strcmp("--batch_size", argv[index]) == 0) { | ||
index++; | ||
batch_size = atoi(argv[index]); | ||
} else if (strcmp("--hidden_units", argv[index]) == 0) { | ||
index++; | ||
hidden_units_string = argv[index]; | ||
} else if (strcmp("--gpu", argv[index]) == 0) { | ||
isGpu = true; | ||
index ++; | ||
} else if (strcmp("--help", argv[index]) == 0) { | ||
printUsage(); | ||
return 0; | ||
} | ||
index ++; | ||
} | ||
|
||
if (training_set.empty() || test_set.empty() || hidden_units_string.empty()) { | ||
std::cout << "ERROR: The mandatory arguments such as path to training and test data or number of hidden units for mlp are not specified." << std::endl << std::endl; | ||
printUsage(); | ||
return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should return 1 on errors. |
||
} | ||
|
||
std::vector<int> hidden_units; | ||
getLayers(hidden_units_string, hidden_units); | ||
|
||
if (hidden_units.empty()) { | ||
std::cout << "ERROR: Number of hidden units are not provided in correct format. The numbers need to be separated by ','." << std::endl << std::endl; | ||
printUsage(); | ||
return 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see above. |
||
} | ||
|
||
/* | ||
* The MNIST data in CSV format has 785 columns. | ||
* The first column is "Label" and rest of the columns contain data. | ||
* The mnist_train.csv has 60000 records and mnist_test.csv has | ||
* 10000 records. | ||
*/ | ||
auto train_iter = MXDataIter("CSVIter") | ||
.SetParam("data_csv", training_set) | ||
.SetParam("data_shape", Shape(num_mnist_features + 1,1)) | ||
.SetParam("batch_size", batch_size) | ||
.SetParam("flat", 1) | ||
.SetParam("shuffle",0) | ||
.CreateDataIter(); | ||
|
||
auto val_iter = MXDataIter("CSVIter") | ||
.SetParam("data_csv", test_set) | ||
.SetParam("data_shape", Shape(num_mnist_features + 1, 1)) | ||
.SetParam("batch_size", batch_size) | ||
.SetParam("flat", 1) | ||
.SetParam("shuffle",0) | ||
.CreateDataIter(); | ||
|
||
auto net = mlp(hidden_units); | ||
|
||
Context ctx = Context::cpu(); | ||
if (isGpu) { | ||
ctx = Context::gpu(); | ||
} | ||
|
||
std::map<std::string, NDArray> args; | ||
args["data"] = NDArray(Shape(batch_size, num_mnist_features), ctx); | ||
args["label"] = NDArray(Shape(batch_size), ctx); | ||
// Let MXNet infer shapes other parameters such as weights | ||
net.InferArgsMap(ctx, &args, args); | ||
|
||
// Initialize all parameters with uniform distribution U(-0.01, 0.01) | ||
auto initializer = Uniform(0.01); | ||
for (auto& arg : args) { | ||
// arg.first is parameter name, and arg.second is the value | ||
initializer(arg.first, &arg.second); | ||
} | ||
|
||
// Create sgd optimizer | ||
Optimizer* opt = OptimizerRegistry::Find("sgd"); | ||
opt->SetParam("rescale_grad", 1.0/batch_size) | ||
->SetParam("lr", learning_rate) | ||
->SetParam("wd", weight_decay); | ||
|
||
// Create executor by binding parameters to the model | ||
auto *exec = net.SimpleBind(ctx, args); | ||
auto arg_names = net.ListArguments(); | ||
|
||
// Start training | ||
for (int iter = 0; iter < max_epoch; ++iter) { | ||
int samples = 0; | ||
train_iter.Reset(); | ||
|
||
auto tic = std::chrono::system_clock::now(); | ||
while (train_iter.Next()) { | ||
samples += batch_size; | ||
auto data_batch = train_iter.GetDataBatch(); | ||
|
||
/* | ||
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) | ||
* Need to reshape this data so that label column can be extracted from this data. | ||
*/ | ||
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size)); | ||
|
||
// Extract the label data by slicing the first column of the data and copy it to "label" arg. | ||
reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]); | ||
|
||
// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg. | ||
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]); | ||
|
||
exec->Forward(true); | ||
|
||
// Compute gradients | ||
exec->Backward(); | ||
// Update parameters | ||
for (size_t i = 0; i < arg_names.size(); ++i) { | ||
if (arg_names[i] == "data" || arg_names[i] == "label") continue; | ||
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); | ||
} | ||
} | ||
auto toc = std::chrono::system_clock::now(); | ||
|
||
Accuracy acc; | ||
val_iter.Reset(); | ||
while (val_iter.Next()) { | ||
auto data_batch = val_iter.GetDataBatch(); | ||
|
||
/* | ||
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) | ||
* Need to reshape this data so that label column can be extracted from this data. | ||
*/ | ||
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size)); | ||
|
||
// Extract the label data by slicing the first column of the data and copy it to "label" arg. | ||
NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size)); | ||
labelData.CopyTo(&args["label"]); | ||
|
||
// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "data" arg. | ||
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["data"]); | ||
|
||
// Forward pass is enough as no gradient is needed when evaluating | ||
exec->Forward(false); | ||
acc.Update(labelData, exec->outputs[0]); | ||
} | ||
float duration = std::chrono::duration_cast<std::chrono::milliseconds> | ||
(toc - tic).count() / 1000.0; | ||
LG << "Epoch[" << iter << "] " << samples/duration << " samples/sec Accuracy: " << acc.Get(); | ||
} | ||
|
||
delete exec; | ||
MXNotifyShutdown(); | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# Following file converts the mnist data to CSV format. | ||
# Usage: | ||
# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000 | ||
# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000 | ||
# | ||
|
||
import argparse | ||
|
||
def convert_to_csv(args): | ||
imageFile = open(args.imageFile, "rb") | ||
labelFile = open(args.labelFile, "rb") | ||
outputFile = open(args.outputFile, "w") | ||
|
||
imageFile.read(16) | ||
labelFile.read(8) | ||
images = [] | ||
|
||
for i in range(args.num_records): | ||
image = [ord(labelFile.read(1))] | ||
for j in range(28 * 28): | ||
image.append(ord(imageFile.read(1))) | ||
images.append(image) | ||
|
||
for image in images: | ||
outputFile.write(",".join(str(pix) for pix in image) + "\n") | ||
|
||
imageFile.close() | ||
outputFile.close() | ||
labelFile.close() | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte") | ||
parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte") | ||
parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv") | ||
parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000") | ||
args = parser.parse_args() | ||
|
||
try: | ||
convert_to_csv(args) | ||
except Exception as e: | ||
print("Error : Exception {}".format(str(e))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# This file is a unit test for mlp_csv.cpp example in 'example' directory. | ||
# The file | ||
# 1. Downloads the MNIST data, | ||
# 2. Converts it into CSV format. | ||
# 3. Runs the mlp_csv example and ensures that the accuracy is more than expected. | ||
# | ||
|
||
#!/bin/bash | ||
|
||
set -e # exit on the first error | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am wondering why chose shell script for this? this example as Cpp, Py and sh..may be just use python here, WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I chose shell script because I found it easier to invoke python and cpp programs from shell and process the output using grep. There was no other particular reason. We can revisit later and may be tie unit tests for all examples in single framework later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Python and check_call is more maintainable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure @larroy . I will consider these suggestions when I will start implementing the test framework. |
||
export EXE_NAME=mlp_csv | ||
|
||
cd $(dirname $(readlink -f $0))/../ | ||
export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH | ||
|
||
if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ]; | ||
then | ||
echo "FAIL: ${EXE_NAME} does not exist" | ||
exit | ||
fi | ||
|
||
cp ../../build/cpp-package/example/${EXE_NAME} . | ||
|
||
./get_data.sh | ||
python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000 | ||
python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000 | ||
|
||
./${EXE_NAME} --train ./data/mnist_data/mnist_train.csv --test ./data/mnist_data/mnist_test.csv --epochs 10 --batch_size 100 --hidden_units "128,64,10" 2&> ${EXE_NAME}.log | ||
|
||
if [ ! -f ${EXE_NAME}.log ]; | ||
then | ||
echo "FAIL: Log file ${EXE_NAME}.log does not exist." | ||
exit | ||
fi | ||
|
||
# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format. | ||
export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'` | ||
export Acc_expected=0.98 | ||
|
||
# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL. | ||
if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ; | ||
then | ||
echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy." | ||
else | ||
echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected." | ||
fi |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we don't add Author tags, please remove
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will remove it. I had seen it in the other mlp examples hence added it.