Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
enable TensorRT integration with cpp api (#15335)
Browse files Browse the repository at this point in the history
  • Loading branch information
haohuanw authored and KellenSunderland committed Jul 4, 2019
1 parent 5078853 commit fc54781
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 26 deletions.
14 changes: 14 additions & 0 deletions cpp-package/example/inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ imagenet_inference --symbol_file <model symbol file in json format>
--num_inference_batches <number of batches used for inference>
--data_layer_type <default: "float32", choices: ["float32", "int8", "uint8"]>
--gpu <whether to run inference on GPU, default: false>
--enableTRT <whether to run inference with TensorRT, default: false>"
--benchmark <whether to use dummy data to run inference, default: false>
```

Expand Down Expand Up @@ -134,6 +135,19 @@ imagenet_inference.cpp:372: Running the forward pass on model to evaluate the pe
imagenet_inference.cpp:387: benchmark completed!
imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms
```
For running this example with TensorRT, you can quickly try the following example to run a benchmark test for testing Inception BN:
```
./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --batch_size 16 --num_inference_batches 500 --benchmark --enableTRT
```
Sample output will looks like this (the example is running on a AWS P3.2xl machine):
```
imagenet_inference.cpp:302: Loading the model from ./model/Inception-BN-symbol.json
build_subgraph.cc:686: start to execute partition graph.
imagenet_inference.cpp:317: Loading the model parameters from ./model/Inception-BN-0126.params
imagenet_inference.cpp:424: Running the forward pass on model to evaluate the performance..
imagenet_inference.cpp:439: benchmark completed!
imagenet_inference.cpp:440: batch size: 16 num batch: 500 throughput: 6284.78 imgs/s latency:0.159115 ms
```

## [sentiment_analysis_rnn.cpp](<https://github.com/apache/incubator-mxnet/blob/master/cpp-package/example/inference/sentiment_analysis_rnn.cpp>)
This example demonstrates how you can load a pre-trained RNN model and use it to predict the sentiment expressed in the given movie review with the MXNet C++ API. The example is capable of processing variable legnth inputs. It performs the following tasks
Expand Down
104 changes: 78 additions & 26 deletions cpp-package/example/inference/imagenet_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class Predictor {
const std::string& model_params_file,
const Shape& input_shape,
bool use_gpu,
bool enable_tensorrt,
const std::string& dataset,
const int data_nthreads,
const std::string& data_layer_type,
Expand All @@ -98,6 +99,13 @@ class Predictor {
bool AdvanceDataIter(int skipped_batches);
void LoadModel(const std::string& model_json_file);
void LoadParameters(const std::string& model_parameters_file);
void SplitParamMap(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *argParamInTargetContext,
std::map<std::string, NDArray> *auxParamInTargetContext,
Context targetContext);
void ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *paramMapInTargetContext,
Context targetContext);
void InitParameters();

inline bool FileExists(const std::string &name) {
Expand All @@ -115,6 +123,7 @@ class Predictor {

MXDataIter *val_iter_;
bool use_gpu_;
bool enable_tensorrt_;
std::string dataset_;
int data_nthreads_;
std::string data_layer_type_;
Expand All @@ -134,14 +143,15 @@ class Predictor {
* the input shape is required to be in format Shape(1, number_of_channels, height, width)
* The input image will be resized to (height x width) size before running the inference.
* 4. use_gpu: determine if run inference on GPU
* 5. dataset: data file (.rec) to be used for inference
* 6. data_nthreads: number of threads for data loading
* 7. data_layer_type: data type for data layer
* 8. rgb_mean: mean value to be subtracted on R/G/B channel
* 9. rgb_std: standard deviation on R/G/B channel
* 10. shuffle_chunk_seed: shuffling chunk seed
* 11. seed: shuffling seed
* 12. benchmark: use dummy data for inference
* 5. enable_tensorrt: determine if enable TensorRT
* 6. dataset: data file (.rec) to be used for inference
* 7. data_nthreads: number of threads for data loading
* 8. data_layer_type: data type for data layer
* 9. rgb_mean: mean value to be subtracted on R/G/B channel
* 10. rgb_std: standard deviation on R/G/B channel
* 11. shuffle_chunk_seed: shuffling chunk seed
* 12. seed: shuffling seed
* 13. benchmark: use dummy data for inference
*
* The constructor will:
* 1. Create ImageRecordIter based on the given dataset file.
Expand All @@ -152,6 +162,7 @@ Predictor::Predictor(const std::string& model_json_file,
const std::string& model_params_file,
const Shape& input_shape,
bool use_gpu,
bool enable_tensorrt,
const std::string& dataset,
const int data_nthreads,
const std::string& data_layer_type,
Expand All @@ -161,6 +172,7 @@ Predictor::Predictor(const std::string& model_json_file,
int seed, bool benchmark)
: input_shape_(input_shape),
use_gpu_(use_gpu),
enable_tensorrt_(enable_tensorrt),
dataset_(dataset),
data_nthreads_(data_nthreads),
data_layer_type_(data_layer_type),
Expand All @@ -182,12 +194,12 @@ Predictor::Predictor(const std::string& model_json_file,
// Load the model
LoadModel(model_json_file);
// Initilize the parameters
// benchmark=false, load parameters from file
// benchmark=true, randomly initialize parameters
if (!benchmark_) {
LoadParameters(model_params_file);
} else {
// benchmark=true && model_params_file.empty(), randomly initialize parameters
// else, load parameters
if (benchmark_ && model_params_file.empty()) {
InitParameters();
} else {
LoadParameters(model_params_file);
}

int dtype = GetDataLayerType();
Expand Down Expand Up @@ -289,9 +301,11 @@ void Predictor::LoadModel(const std::string& model_json_file) {
}
LG << "Loading the model from " << model_json_file << std::endl;
net_ = Symbol::Load(model_json_file);
if (enable_tensorrt_) {
net_ = net_.GetBackendSymbol("TensorRT");
}
}


/*
* The following function loads the model parameters.
*/
Expand All @@ -303,20 +317,50 @@ void Predictor::LoadParameters(const std::string& model_parameters_file) {
LG << "Loading the model parameters from " << model_parameters_file << std::endl;
std::map<std::string, NDArray> parameters;
NDArray::Load(model_parameters_file, 0, &parameters);
for (const auto &k : parameters) {
if (k.first.substr(0, 4) == "aux:") {
auto name = k.first.substr(4, k.first.size() - 4);
aux_map_[name] = k.second.Copy(global_ctx_);
}
if (k.first.substr(0, 4) == "arg:") {
auto name = k.first.substr(4, k.first.size() - 4);
args_map_[name] = k.second.Copy(global_ctx_);
}
if (enable_tensorrt_) {
std::map<std::string, NDArray> intermediate_args_map;
std::map<std::string, NDArray> intermediate_aux_map;
SplitParamMap(parameters, &intermediate_args_map, &intermediate_aux_map, Context::cpu());
contrib::InitTensorRTParams(net_, &intermediate_args_map, &intermediate_aux_map);
ConvertParamMapToTargetContext(intermediate_args_map, &args_map_, global_ctx_);
ConvertParamMapToTargetContext(intermediate_aux_map, &aux_map_, global_ctx_);
} else {
SplitParamMap(parameters, &args_map_, &aux_map_, global_ctx_);
}
/*WaitAll is need when we copy data between GPU and the main memory*/
NDArray::WaitAll();
}

/*
* The following function split loaded param map into arg parm
* and aux param with target context
*/
void Predictor::SplitParamMap(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *argParamInTargetContext,
std::map<std::string, NDArray> *auxParamInTargetContext,
Context targetContext) {
for (const auto& pair : paramMap) {
std::string type = pair.first.substr(0, 4);
std::string name = pair.first.substr(4);
if (type == "arg:") {
(*argParamInTargetContext)[name] = pair.second.Copy(targetContext);
} else if (type == "aux:") {
(*auxParamInTargetContext)[name] = pair.second.Copy(targetContext);
}
}
}

/*
* The following function copy the param map into the target context
*/
void Predictor::ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
std::map<std::string, NDArray> *paramMapInTargetContext,
Context targetContext) {
for (const auto& pair : paramMap) {
(*paramMapInTargetContext)[pair.first] = pair.second.Copy(targetContext);
}
}

/*
* The following function randomly initializes the parameters when benchmark_ is true.
*/
Expand Down Expand Up @@ -517,6 +561,8 @@ void printUsage() {
<< "--data_layer_type <default: \"float32\" "
<< "choices: [\"float32\",\"int8\",\"uint8\"]>" << std::endl
<< "--gpu <whether to run inference on GPU, default: false>" << std::endl
<< "--enableTRT <whether to run inference with TensorRT, "
<< "default: false>" << std::endl
<< "--benchmark <whether to use dummy data to run inference, default: false>"
<< std::endl;
}
Expand All @@ -528,6 +574,7 @@ int main(int argc, char** argv) {
std::string input_rgb_mean("0 0 0");
std::string input_rgb_std("1 1 1");
bool use_gpu = false;
bool enable_tensorrt = false;
bool benchmark = false;
int batch_size = 64;
int num_skipped_batches = 0;
Expand Down Expand Up @@ -575,6 +622,9 @@ int main(int argc, char** argv) {
data_layer_type = (index < argc ? argv[index]:data_layer_type);
} else if (strcmp("--gpu", argv[index]) == 0) {
use_gpu = true;
} else if (strcmp("--enableTRT", argv[index]) == 0) {
use_gpu = true;
enable_tensorrt = true;
} else if (strcmp("--benchmark", argv[index]) == 0) {
benchmark = true;
} else if (strcmp("--help", argv[index]) == 0) {
Expand All @@ -584,7 +634,9 @@ int main(int argc, char** argv) {
index++;
}

if (model_file_json.empty() || (!benchmark && model_file_params.empty())) {
if (model_file_json.empty()
|| (!benchmark && model_file_params.empty())
|| (enable_tensorrt && model_file_params.empty())) {
LG << "ERROR: Model details such as symbol, param files are not specified";
printUsage();
return 1;
Expand All @@ -597,8 +649,8 @@ int main(int argc, char** argv) {
std::vector<float> rgb_std = createVectorFromString<float>(input_rgb_std);

// Initialize the predictor object
Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, dataset,
data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed,
Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, enable_tensorrt,
dataset, data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed,
seed, benchmark);

if (benchmark) {
Expand Down
1 change: 1 addition & 0 deletions cpp-package/include/mxnet-cpp/MxNetCpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@
#include "mxnet-cpp/io.hpp"
#include "mxnet-cpp/metric.h"
#include "mxnet-cpp/initializer.h"
#include "mxnet-cpp/contrib.h"

#endif // MXNET_CPP_MXNETCPP_H_
115 changes: 115 additions & 0 deletions cpp-package/include/mxnet-cpp/contrib.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file contrib.h
* \brief utility function to enable some contrib features
* \author Haohuan Wang
*/
#ifndef MXNET_CPP_CONTRIB_H_
#define MXNET_CPP_CONTRIB_H_

#include <iostream>
#include <string>
#include <map>
#include <vector>
#include "mxnet-cpp/symbol.h"

namespace mxnet {
namespace cpp {
namespace details {

/*!
* split a string with the given delimiter
* @param str string to be parsed
* @param delimiter delimiter
* @return delimited list of string
*/
inline std::vector<std::string> split(const std::string& str, const std::string& delimiter) {
std::vector<std::string> splitted;
size_t last = 0;
size_t next = 0;
while ((next = str.find(delimiter, last)) != std::string::npos) {
splitted.push_back(str.substr(last, next - last));
last = next + 1;
}
splitted.push_back(str.substr(last));
return splitted;
}

} // namespace details

namespace contrib {

// needs to be same with
// https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190
static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names";
// needs to be same with
// https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244
static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_";
/*!
* this is a mimic to https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37
* @param symbol symbol that already called subgraph api
* @param argParams original arg params, params needed by tensorrt will be removed after calling this function
* @param auxParams original aux params, params needed by tensorrt will be removed after calling this function
*/
inline void InitTensorRTParams(const mxnet::cpp::Symbol& symbol,
std::map<std::string, mxnet::cpp::NDArray> *argParams,
std::map<std::string, mxnet::cpp::NDArray> *auxParams) {
mxnet::cpp::Symbol internals = symbol.GetInternals();
mx_uint numSymbol = internals.GetNumOutputs();
for (mx_uint i = 0; i < numSymbol; ++i) {
std::map<std::string, std::string> attrs = internals[i].ListAttributes();
if (attrs.find(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER) != attrs.end()) {
std::string new_params_names;
std::map<std::string, mxnet::cpp::NDArray> tensorrtParams;
std::vector<std::string> keys = details::split(
attrs[TENSORRT_SUBGRAPH_PARAM_IDENTIFIER], ";");
for (const auto& key : keys) {
if (argParams->find(key) != argParams->end()) {
new_params_names += key + ";";
tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*argParams)[key];
argParams->erase(key);
} else if (auxParams->find(key) != auxParams->end()) {
new_params_names += key + ";";
tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*auxParams)[key];
auxParams->erase(key);
}
}
std::map<std::string, std::string> new_attrs = {};
for (const auto& kv : tensorrtParams) {
// passing the ndarray address into TRT node attributes to get the weight
uint64_t address = reinterpret_cast<uint64_t>(kv.second.GetHandle());
new_attrs[kv.first] = std::to_string(address);
}
if (!new_attrs.empty()) {
internals[i].SetAttributes(new_attrs);
internals[i].SetAttribute(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER,
new_params_names.substr(0, new_params_names.length() - 1));
}
}
}
}

} // namespace contrib
} // namespace cpp
} // namespace mxnet

#endif // MXNET_CPP_CONTRIB_H_
17 changes: 17 additions & 0 deletions cpp-package/include/mxnet-cpp/symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,23 @@ class Symbol {
std::vector<std::string> ListOutputs() const;
/*! \return get the descriptions of auxiliary data for this symbol */
std::vector<std::string> ListAuxiliaryStates() const;
/*! \return get all attributes for this symbol */
std::map<std::string, std::string> ListAttributes() const;
/*!
* \brief set key-value attribute to the symbol
* @param key string represent the key for the attribute
* @param value string represent the value for the attribute
*/
void SetAttribute(const std::string& key, const std::string& value);
/*!
* \brief set a series of key-value attribute to the symbol
* @param attrs string:string map represent the key value attributes
*/
void SetAttributes(const std::map<std::string, std::string>& attrs);
/*! \return get number of outputs for this symbol */
mx_uint GetNumOutputs() const;
/*! \return get the new symbol through subgraph API for this symbol */
mxnet::cpp::Symbol GetBackendSymbol(const std::string& backendName) const;
/*! \return get the name of the symbol */
std::string GetName() const;
/*!
Expand Down
Loading

0 comments on commit fc54781

Please sign in to comment.