-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-982] Provide example to illustrate usage of CSVIter in C++ API #12636
Changes from 1 commit
0e3f771
735def8
46b4ced
87ca45c
bdf34f3
02e784a
f922509
a315f74
387f2a4
e8a9b14
7ce4da0
1d07b2e
b8338f0
52dd094
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Amol Lele [email protected] | ||
*/ | ||
|
||
/* | ||
* Example: mlp_csv_cpu | ||
* Description: | ||
* The following example demonstrates how to use CSVIter. This example creates | ||
* mlp (multi-layer perceptron) model and trains the MNIST data which is in | ||
* CSV format. | ||
*/ | ||
#include <chrono> | ||
#include "utils.h" | ||
#include "mxnet-cpp/MxNetCpp.h" | ||
|
||
using namespace mxnet::cpp; | ||
|
||
/* | ||
* Implementing the mlp symbol with given layer configuration. | ||
*/ | ||
Symbol mlp(const std::vector<int> &layers) | ||
{ | ||
auto x = Symbol::Variable("X"); | ||
auto label = Symbol::Variable("label"); | ||
|
||
std::vector<Symbol> weights(layers.size()); | ||
std::vector<Symbol> biases(layers.size()); | ||
std::vector<Symbol> outputs(layers.size()); | ||
|
||
for (size_t i = 0; i < layers.size(); ++i) { | ||
weights[i] = Symbol::Variable("w" + std::to_string(i)); | ||
biases[i] = Symbol::Variable("b" + std::to_string(i)); | ||
Symbol fc = FullyConnected( | ||
i == 0? x : outputs[i-1], // data | ||
weights[i], | ||
biases[i], | ||
layers[i]); | ||
outputs[i] = i == layers.size()-1 ? fc : Activation(fc, ActivationActType::kRelu); | ||
} | ||
return SoftmaxOutput(outputs.back(), label); | ||
} | ||
|
||
|
||
int main(int argc, char** argv) | ||
{ | ||
const int image_size = 28; | ||
const int num_mnist_features = image_size * image_size; | ||
const std::vector<int> layers{128, 64, 10}; | ||
const int batch_size = 100; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make all of these configurable, lets make sure we follow the same as Python Examples. |
||
const int max_epoch = 10; | ||
const float learning_rate = 0.1; | ||
const float weight_decay = 1e-2; | ||
|
||
/* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nicely commented, this is very useful for examples 👍 |
||
* The MNIST data in CSV format has 785 columns. | ||
* The first column is "Label" and rest of the columns contain data. | ||
* The mnist_train.csv has 60000 records and mnist_test.csv has | ||
* 10000 records. | ||
*/ | ||
std::vector<std::string> data_files = { "./data/mnist_train.csv", | ||
"./data/mnist_test.csv"}; | ||
|
||
auto train_iter = MXDataIter("CSVIter") | ||
.SetParam("data_csv", "./data/mnist_train.csv") | ||
.SetParam("data_shape", Shape(num_mnist_features + 1,1)) | ||
.SetParam("batch_size", batch_size) | ||
.SetParam("flat", 1) | ||
.SetParam("shuffle",0) | ||
.CreateDataIter(); | ||
|
||
auto val_iter = MXDataIter("CSVIter") | ||
.SetParam("data_csv", "./data/mnist_test.csv") | ||
.SetParam("data_shape", Shape(num_mnist_features + 1, 1)) | ||
.SetParam("batch_size", batch_size) | ||
.SetParam("flat", 1) | ||
.SetParam("shuffle",0) | ||
.CreateDataIter(); | ||
|
||
auto net = mlp(layers); | ||
|
||
Context ctx = Context::cpu(); // Use CPU for training | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make this configurable? |
||
|
||
std::map<std::string, NDArray> args; | ||
args["X"] = NDArray(Shape(batch_size, num_mnist_features), ctx); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why "X"? we use "data" for the input layer |
||
args["label"] = NDArray(Shape(batch_size), ctx); | ||
// Let MXNet infer shapes other parameters such as weights | ||
net.InferArgsMap(ctx, &args, args); | ||
|
||
// Initialize all parameters with uniform distribution U(-0.01, 0.01) | ||
auto initializer = Uniform(0.01); | ||
for (auto& arg : args) { | ||
// arg.first is parameter name, and arg.second is the value | ||
initializer(arg.first, &arg.second); | ||
} | ||
|
||
// Create sgd optimizer | ||
Optimizer* opt = OptimizerRegistry::Find("sgd"); | ||
opt->SetParam("rescale_grad", 1.0/batch_size) | ||
->SetParam("lr", learning_rate) | ||
->SetParam("wd", weight_decay); | ||
|
||
// Create executor by binding parameters to the model | ||
auto *exec = net.SimpleBind(ctx, args); | ||
auto arg_names = net.ListArguments(); | ||
|
||
// Start training | ||
for (int iter = 0; iter < max_epoch; ++iter) { | ||
int samples = 0; | ||
train_iter.Reset(); | ||
|
||
auto tic = std::chrono::system_clock::now(); | ||
while (train_iter.Next()) { | ||
samples += batch_size; | ||
auto data_batch = train_iter.GetDataBatch(); | ||
|
||
/* | ||
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) | ||
* Need to reshape this data so that label column can be extracted from this data. | ||
*/ | ||
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size)); | ||
|
||
// Extract the label data by slicing the first column of the data and copy it to "label" arg. | ||
reshapedData.Slice(0,1).Reshape(Shape(batch_size)).CopyTo(&args["label"]); | ||
|
||
// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg. | ||
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]); | ||
|
||
// Compute gradients | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move this comment to the next line(forward does not compute gradients) |
||
exec->Forward(true); | ||
exec->Backward(); | ||
// Update parameters | ||
for (size_t i = 0; i < arg_names.size(); ++i) { | ||
if (arg_names[i] == "X" || arg_names[i] == "label") continue; | ||
opt->Update(i, exec->arg_arrays[i], exec->grad_arrays[i]); | ||
} | ||
} | ||
auto toc = std::chrono::system_clock::now(); | ||
|
||
Accuracy acc; | ||
val_iter.Reset(); | ||
while (val_iter.Next()) { | ||
auto data_batch = val_iter.GetDataBatch(); | ||
|
||
/* | ||
* The shape of data_batch.data is (batch_size, (num_mnist_features + 1)) | ||
* Need to reshape this data so that label column can be extracted from this data. | ||
*/ | ||
NDArray reshapedData = data_batch.data.Reshape(Shape((num_mnist_features + 1),batch_size)); | ||
|
||
// Extract the label data by slicing the first column of the data and copy it to "label" arg. | ||
NDArray labelData = reshapedData.Slice(0,1).Reshape(Shape(batch_size)); | ||
labelData.CopyTo(&args["label"]); | ||
|
||
// Extract the feature data by slicing the columns 1 to 785 of the data and copy it to "X" arg. | ||
reshapedData.Slice(1,(num_mnist_features + 1)).Reshape(Shape(batch_size,num_mnist_features)).CopyTo(&args["X"]); | ||
|
||
// Forward pass is enough as no gradient is needed when evaluating | ||
exec->Forward(false); | ||
acc.Update(labelData, exec->outputs[0]); | ||
} | ||
float duration = std::chrono::duration_cast<std::chrono::milliseconds> | ||
(toc - tic).count() / 1000.0; | ||
LG << "Epoch: " << iter << " " << samples/duration << " samples/sec Accuracy: " << acc.Get(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you follow the same format as the Python Examples. |
||
} | ||
|
||
delete exec; | ||
MXNotifyShutdown(); | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# Following file converts the mnist data to CSV format. | ||
# Usage: | ||
# mnist_to_csv.py train-images-idx3-ubyte train-labels-idx1-ubyte mnist_train.csv 60000 | ||
# mnist_to_csv.py t10k-images-idx3-ubyte t10k-labels-idx1-ubyte mnist_test.csv 10000 | ||
# | ||
|
||
import argparse | ||
|
||
def convert_to_csv(args): | ||
imageFile = open(args.imageFile, "rb") | ||
labelFile = open(args.labelFile, "rb") | ||
outputFile = open(args.outputFile, "w") | ||
|
||
imageFile.read(16) | ||
labelFile.read(8) | ||
images = [] | ||
|
||
for i in range(args.num_records): | ||
image = [ord(labelFile.read(1))] | ||
for j in range(28 * 28): | ||
image.append(ord(imageFile.read(1))) | ||
images.append(image) | ||
|
||
for image in images: | ||
outputFile.write(",".join(str(pix) for pix in image) + "\n") | ||
|
||
imageFile.close() | ||
outputFile.close() | ||
labelFile.close() | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("imageFile", type=str, help="image file in mnist format e.g. train-images-idx3-ubyte") | ||
parser.add_argument("labelFile", type=str, help="label file in mnist format e.g train-labels-idx1-ubyte") | ||
parser.add_argument("outputFile", type=str, help="Output file in CSV format e.g mnist_train_trial.csv") | ||
parser.add_argument("num_records", type=int, help="Number of images in the input files.e.g 60000") | ||
args = parser.parse_args() | ||
|
||
try: | ||
convert_to_csv(args) | ||
except Exception as e: | ||
print("Error : Exception {}".format(str(e))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
# This file is a unit test for mlp_csv_cpu.cpp example in 'example' directory. | ||
# The file | ||
# 1. Downloads the MNIST data, | ||
# 2. Converts it into CSV format. | ||
# 3. Runs the mlp_csv_cpu example and ensures that the accuracy is more than expected. | ||
# | ||
|
||
#!/bin/bash | ||
|
||
set -e # exit on the first error | ||
export EXE_NAME=mlp_csv_cpu | ||
|
||
cd $(dirname $(readlink -f $0))/../example | ||
export LD_LIBRARY_PATH=$(readlink -f ../../lib):$LD_LIBRARY_PATH | ||
|
||
if [ ! -f ../../build/cpp-package/example/${EXE_NAME} ]; | ||
then | ||
echo "FAIL: ${EXE_NAME} does not exist" | ||
exit | ||
fi | ||
|
||
cp ../../build/cpp-package/example/${EXE_NAME} . | ||
|
||
./get_data.sh | ||
python mnist_to_csv.py ./data/mnist_data/train-images-idx3-ubyte ./data/mnist_data/train-labels-idx1-ubyte ./data/mnist_data/mnist_train.csv 60000 | ||
python mnist_to_csv.py ./data/mnist_data/t10k-images-idx3-ubyte ./data/mnist_data/t10k-labels-idx1-ubyte ./data/mnist_data/mnist_test.csv 10000 | ||
|
||
./${EXE_NAME} 2&> ${EXE_NAME}.log | ||
|
||
if [ ! -f ${EXE_NAME}.log ]; | ||
then | ||
echo "FAIL: Log file ${EXE_NAME}.log does not exist." | ||
exit | ||
fi | ||
|
||
# Obtain the accuracy achieved by mlp model after training with MNIST data in CSV format. | ||
export Acc_obtained=`grep -oP '.*\K(?<=Accuracy: ).*$' ${EXE_NAME}.log | tail -1 | tr -d '\n'` | ||
export Acc_expected=0.98 | ||
|
||
# If the obtained accuracy does not meet the expected accuracy, report the test as FAIL. | ||
if [ $(echo "$Acc_obtained $Acc_expected" | awk '{printf($1 >= $2) ? 1 : 0}') -eq 1 ] ; | ||
then | ||
echo "PASS: ${EXE_NAME} obtained $Acc_obtained accuracy." | ||
else | ||
echo "FAIL: Accuracy = $Acc_obtained is less than expected accuracy $Acc_expected." | ||
fi |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
layers->hidden_layer_size? layers generally is referred to convolution, pooling, dense, etc.,