Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

enable TensorRT integration with cpp api #15335

Merged
merged 1 commit into from
Jul 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cpp-package/example/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ imagenet_inference --symbol_file <model symbol file in json format>
--num_inference_batches <number of batches used for inference>
--data_layer_type <default: "float32", choices: ["float32", "int8", "uint8"]>
--gpu <whether to run inference on GPU, default: false>
--enableTRT <whether to run inference with TensorRT, default: false>"
--benchmark <whether to use dummy data to run inference, default: false>
```

Expand Down Expand Up @@ -134,6 +135,19 @@ imagenet_inference.cpp:372: Running the forward pass on model to evaluate the pe
imagenet_inference.cpp:387: benchmark completed!
imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms
```
For running this example with TensorRT, you can quickly try the following example to run a benchmark test for testing Inception BN:
```
./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --batch_size 16 --num_inference_batches 500 --benchmark --enableTRT
```
Sample output will looks like this (the example is running on a AWS P3.2xl machine):
```
imagenet_inference.cpp:302: Loading the model from ./model/Inception-BN-symbol.json
build_subgraph.cc:686: start to execute partition graph.
imagenet_inference.cpp:317: Loading the model parameters from ./model/Inception-BN-0126.params
imagenet_inference.cpp:424: Running the forward pass on model to evaluate the performance..
imagenet_inference.cpp:439: benchmark completed!
imagenet_inference.cpp:440: batch size: 16 num batch: 500 throughput: 6284.78 imgs/s latency:0.159115 ms
```

## [sentiment_analysis_rnn.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/inference/sentiment_analysis_rnn.cpp>)
This example demonstrates how you can load a pre-trained RNN model and use it to predict the sentiment expressed in the given movie review with the MXNet C++ API. The example is capable of processing variable legnth inputs. It performs the following tasks
Expand Down
104 changes: 78 additions & 26 deletions cpp-package/example/inference/imagenet_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class Predictor {
const std::string& model_params_file,
const Shape& input_shape,
bool use_gpu,
bool enable_tensorrt,
const std::string& dataset,
const int data_nthreads,
const std::string& data_layer_type,
Expand All @@ -98,6 +99,13 @@ class Predictor {
bool AdvanceDataIter(int skipped_batches);
void LoadModel(const std::string& model_json_file);
void LoadParameters(const std::string& model_parameters_file);
void SplitParamMap(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *argParamInTargetContext,
std::map<std::string, NDArray> *auxParamInTargetContext,
Context targetContext);
void ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *paramMapInTargetContext,
Context targetContext);
void InitParameters();

inline bool FileExists(const std::string &name) {
Expand All @@ -115,6 +123,7 @@ class Predictor {

MXDataIter *val_iter_;
bool use_gpu_;
bool enable_tensorrt_;
std::string dataset_;
int data_nthreads_;
std::string data_layer_type_;
Expand All @@ -134,14 +143,15 @@ class Predictor {
* the input shape is required to be in format Shape(1, number_of_channels, height, width)
* The input image will be resized to (height x width) size before running the inference.
* 4. use_gpu: determine if run inference on GPU
* 5. dataset: data file (.rec) to be used for inference
* 6. data_nthreads: number of threads for data loading
* 7. data_layer_type: data type for data layer
* 8. rgb_mean: mean value to be subtracted on R/G/B channel
* 9. rgb_std: standard deviation on R/G/B channel
* 10. shuffle_chunk_seed: shuffling chunk seed
* 11. seed: shuffling seed
* 12. benchmark: use dummy data for inference
* 5. enable_tensorrt: determine if enable TensorRT
* 6. dataset: data file (.rec) to be used for inference
* 7. data_nthreads: number of threads for data loading
* 8. data_layer_type: data type for data layer
* 9. rgb_mean: mean value to be subtracted on R/G/B channel
* 10. rgb_std: standard deviation on R/G/B channel
* 11. shuffle_chunk_seed: shuffling chunk seed
* 12. seed: shuffling seed
* 13. benchmark: use dummy data for inference
*
* The constructor will:
* 1. Create ImageRecordIter based on the given dataset file.
Expand All @@ -152,6 +162,7 @@ Predictor::Predictor(const std::string& model_json_file,
const std::string& model_params_file,
const Shape& input_shape,
bool use_gpu,
bool enable_tensorrt,
const std::string& dataset,
const int data_nthreads,
const std::string& data_layer_type,
Expand All @@ -161,6 +172,7 @@ Predictor::Predictor(const std::string& model_json_file,
int seed, bool benchmark)
: input_shape_(input_shape),
use_gpu_(use_gpu),
enable_tensorrt_(enable_tensorrt),
dataset_(dataset),
data_nthreads_(data_nthreads),
data_layer_type_(data_layer_type),
Expand All @@ -182,12 +194,12 @@ Predictor::Predictor(const std::string& model_json_file,
// Load the model
LoadModel(model_json_file);
// Initilize the parameters
// benchmark=false, load parameters from file
// benchmark=true, randomly initialize parameters
if (!benchmark_) {
LoadParameters(model_params_file);
} else {
// benchmark=true && model_params_file.empty(), randomly initialize parameters
// else, load parameters
if (benchmark_ && model_params_file.empty()) {
InitParameters();
} else {
LoadParameters(model_params_file);
}

int dtype = GetDataLayerType();
Expand Down Expand Up @@ -289,9 +301,11 @@ void Predictor::LoadModel(const std::string& model_json_file) {
}
LG << "Loading the model from " << model_json_file << std::endl;
net_ = Symbol::Load(model_json_file);
if (enable_tensorrt_) {
net_ = net_.GetBackendSymbol("TensorRT");
}
}


/*
* The following function loads the model parameters.
*/
Expand All @@ -303,20 +317,50 @@ void Predictor::LoadParameters(const std::string& model_parameters_file) {
LG << "Loading the model parameters from " << model_parameters_file << std::endl;
std::map<std::string, NDArray> parameters;
NDArray::Load(model_parameters_file, 0, &parameters);
for (const auto &k : parameters) {
if (k.first.substr(0, 4) == "aux:") {
auto name = k.first.substr(4, k.first.size() - 4);
aux_map_[name] = k.second.Copy(global_ctx_);
}
if (k.first.substr(0, 4) == "arg:") {
auto name = k.first.substr(4, k.first.size() - 4);
args_map_[name] = k.second.Copy(global_ctx_);
}
if (enable_tensorrt_) {
std::map<std::string, NDArray> intermediate_args_map;
std::map<std::string, NDArray> intermediate_aux_map;
SplitParamMap(parameters, &intermediate_args_map, &intermediate_aux_map, Context::cpu());
contrib::InitTensorRTParams(net_, &intermediate_args_map, &intermediate_aux_map);
ConvertParamMapToTargetContext(intermediate_args_map, &args_map_, global_ctx_);
ConvertParamMapToTargetContext(intermediate_aux_map, &aux_map_, global_ctx_);
} else {
SplitParamMap(parameters, &args_map_, &aux_map_, global_ctx_);
}
/*WaitAll is need when we copy data between GPU and the main memory*/
NDArray::WaitAll();
}

/*
* The following function split loaded param map into arg parm
* and aux param with target context
*/
void Predictor::SplitParamMap(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *argParamInTargetContext,
std::map<std::string, NDArray> *auxParamInTargetContext,
Context targetContext) {
for (const auto& pair : paramMap) {
std::string type = pair.first.substr(0, 4);
std::string name = pair.first.substr(4);
if (type == "arg:") {
(*argParamInTargetContext)[name] = pair.second.Copy(targetContext);
} else if (type == "aux:") {
(*auxParamInTargetContext)[name] = pair.second.Copy(targetContext);
}
}
}

/*
* The following function copy the param map into the target context
*/
void Predictor::ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *paramMapInTargetContext,
Context targetContext) {
for (const auto& pair : paramMap) {
(*paramMapInTargetContext)[pair.first] = pair.second.Copy(targetContext);
}
}

/*
* The following function randomly initializes the parameters when benchmark_ is true.
*/
Expand Down Expand Up @@ -517,6 +561,8 @@ void printUsage() {
<< "--data_layer_type <default: \"float32\" "
<< "choices: [\"float32\",\"int8\",\"uint8\"]>" << std::endl
<< "--gpu <whether to run inference on GPU, default: false>" << std::endl
<< "--enableTRT <whether to run inference with TensorRT, "
<< "default: false>" << std::endl
<< "--benchmark <whether to use dummy data to run inference, default: false>"
<< std::endl;
}
Expand All @@ -528,6 +574,7 @@ int main(int argc, char** argv) {
std::string input_rgb_mean("0 0 0");
std::string input_rgb_std("1 1 1");
bool use_gpu = false;
bool enable_tensorrt = false;
bool benchmark = false;
int batch_size = 64;
int num_skipped_batches = 0;
Expand Down Expand Up @@ -575,6 +622,9 @@ int main(int argc, char** argv) {
data_layer_type = (index < argc ? argv[index]:data_layer_type);
} else if (strcmp("--gpu", argv[index]) == 0) {
use_gpu = true;
} else if (strcmp("--enableTRT", argv[index]) == 0) {
use_gpu = true;
enable_tensorrt = true;
} else if (strcmp("--benchmark", argv[index]) == 0) {
benchmark = true;
} else if (strcmp("--help", argv[index]) == 0) {
Expand All @@ -584,7 +634,9 @@ int main(int argc, char** argv) {
index++;
}

if (model_file_json.empty() || (!benchmark && model_file_params.empty())) {
if (model_file_json.empty()
|| (!benchmark && model_file_params.empty())
|| (enable_tensorrt && model_file_params.empty())) {
LG << "ERROR: Model details such as symbol, param files are not specified";
printUsage();
return 1;
Expand All @@ -597,8 +649,8 @@ int main(int argc, char** argv) {
std::vector<float> rgb_std = createVectorFromString<float>(input_rgb_std);

// Initialize the predictor object
Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, dataset,
data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed,
Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, enable_tensorrt,
dataset, data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed,
seed, benchmark);

if (benchmark) {
Expand Down
1 change: 1 addition & 0 deletions cpp-package/include/mxnet-cpp/MxNetCpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@
#include "mxnet-cpp/io.hpp"
#include "mxnet-cpp/metric.h"
#include "mxnet-cpp/initializer.h"
#include "mxnet-cpp/contrib.h"

#endif // MXNET_CPP_MXNETCPP_H_
115 changes: 115 additions & 0 deletions cpp-package/include/mxnet-cpp/contrib.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file contrib.h
* \brief utility function to enable some contrib features
* \author Haohuan Wang
*/
#ifndef MXNET_CPP_CONTRIB_H_
#define MXNET_CPP_CONTRIB_H_

#include <iostream>
#include <string>
#include <map>
#include <vector>
#include "mxnet-cpp/symbol.h"

namespace mxnet {
namespace cpp {
namespace details {

/*!
* split a string with the given delimiter
* @param str string to be parsed
* @param delimiter delimiter
* @return delimited list of string
*/
inline std::vector<std::string> split(const std::string& str, const std::string& delimiter) {
std::vector<std::string> splitted;
size_t last = 0;
size_t next = 0;
while ((next = str.find(delimiter, last)) != std::string::npos) {
splitted.push_back(str.substr(last, next - last));
last = next + 1;
}
splitted.push_back(str.substr(last));
return splitted;
}

} // namespace details

namespace contrib {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@szha Hey Sheng, have we had contrib namespaces in C++ before? The intent of this code basically aligns with the intent of having a contrib level python API. Does namespacing it out like this make sense to you?


// needs to be same with
// https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190
static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names";
// needs to be same with
// https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244
static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably add those constants in src/operator/subgraph/tensorrt/tensorrt-inl.h to keep track

Copy link
Contributor Author

@haohuanw haohuanw Jul 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean duplicate them in the tensorrt-inl.h as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was more thinking putting them in tensorrt-inl.h and including the file in this file (in case we need it somewhere).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmmm, tensorrt-inl.h is not in the default header folder. if we do that we probably need an new header file?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, ignore my original comment then

/*!
* this is a mimic to https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37
* @param symbol symbol that already called subgraph api
* @param argParams original arg params, params needed by tensorrt will be removed after calling this function
* @param auxParams original aux params, params needed by tensorrt will be removed after calling this function
*/
inline void InitTensorRTParams(const mxnet::cpp::Symbol& symbol,
std::map<std::string, mxnet::cpp::NDArray> *argParams,
std::map<std::string, mxnet::cpp::NDArray> *auxParams) {
mxnet::cpp::Symbol internals = symbol.GetInternals();
mx_uint numSymbol = internals.GetNumOutputs();
for (mx_uint i = 0; i < numSymbol; ++i) {
std::map<std::string, std::string> attrs = internals[i].ListAttributes();
if (attrs.find(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER) != attrs.end()) {
std::string new_params_names;
std::map<std::string, mxnet::cpp::NDArray> tensorrtParams;
std::vector<std::string> keys = details::split(
attrs[TENSORRT_SUBGRAPH_PARAM_IDENTIFIER], ";");
for (const auto& key : keys) {
if (argParams->find(key) != argParams->end()) {
new_params_names += key + ";";
tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*argParams)[key];
argParams->erase(key);
} else if (auxParams->find(key) != auxParams->end()) {
new_params_names += key + ";";
tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*auxParams)[key];
auxParams->erase(key);
}
}
std::map<std::string, std::string> new_attrs = {};
for (const auto& kv : tensorrtParams) {
// passing the ndarray address into TRT node attributes to get the weight
uint64_t address = reinterpret_cast<uint64_t>(kv.second.GetHandle());
new_attrs[kv.first] = std::to_string(address);
}
if (!new_attrs.empty()) {
internals[i].SetAttributes(new_attrs);
internals[i].SetAttribute(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER,
new_params_names.substr(0, new_params_names.length() - 1));
}
}
}
}

} // namespace contrib
} // namespace cpp
} // namespace mxnet

#endif // MXNET_CPP_CONTRIB_H_
17 changes: 17 additions & 0 deletions cpp-package/include/mxnet-cpp/symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,23 @@ class Symbol {
std::vector<std::string> ListOutputs() const;
/*! \return get the descriptions of auxiliary data for this symbol */
std::vector<std::string> ListAuxiliaryStates() const;
/*! \return get all attributes for this symbol */
std::map<std::string, std::string> ListAttributes() const;
/*!
* \brief set key-value attribute to the symbol
* @param key string represent the key for the attribute
* @param value string represent the value for the attribute
*/
void SetAttribute(const std::string& key, const std::string& value);
/*!
* \brief set a series of key-value attribute to the symbol
* @param attrs string:string map represent the key value attributes
*/
void SetAttributes(const std::map<std::string, std::string>& attrs);
/*! \return get number of outputs for this symbol */
mx_uint GetNumOutputs() const;
/*! \return get the new symbol through subgraph API for this symbol */
mxnet::cpp::Symbol GetBackendSymbol(const std::string& backendName) const;
/*! \return get the name of the symbol */
std::string GetName() const;
/*!
Expand Down
Loading