diff --git a/cpp-package/example/inference/README.md b/cpp-package/example/inference/README.md index 272586da5da9..81cf9d856c23 100644 --- a/cpp-package/example/inference/README.md +++ b/cpp-package/example/inference/README.md @@ -75,6 +75,7 @@ imagenet_inference --symbol_file --num_inference_batches --data_layer_type --gpu + --enableTRT " --benchmark ``` @@ -134,6 +135,19 @@ imagenet_inference.cpp:372: Running the forward pass on model to evaluate the pe imagenet_inference.cpp:387: benchmark completed! imagenet_inference.cpp:388: batch size: 1 num batch: 500 throughput: xxxx imgs/s latency:xxxx ms ``` +For running this example with TensorRT, you can quickly try the following example to run a benchmark test for testing Inception BN: +``` +./imagenet_inference --symbol_file "./model/Inception-BN-symbol.json" --params_file "./model/Inception-BN-0126.params" --batch_size 16 --num_inference_batches 500 --benchmark --enableTRT +``` +Sample output will looks like this (the example is running on a AWS P3.2xl machine): +``` +imagenet_inference.cpp:302: Loading the model from ./model/Inception-BN-symbol.json +build_subgraph.cc:686: start to execute partition graph. +imagenet_inference.cpp:317: Loading the model parameters from ./model/Inception-BN-0126.params +imagenet_inference.cpp:424: Running the forward pass on model to evaluate the performance.. +imagenet_inference.cpp:439: benchmark completed! +imagenet_inference.cpp:440: batch size: 16 num batch: 500 throughput: 6284.78 imgs/s latency:0.159115 ms +``` ## [sentiment_analysis_rnn.cpp]() This example demonstrates how you can load a pre-trained RNN model and use it to predict the sentiment expressed in the given movie review with the MXNet C++ API. The example is capable of processing variable legnth inputs. It performs the following tasks diff --git a/cpp-package/example/inference/imagenet_inference.cpp b/cpp-package/example/inference/imagenet_inference.cpp index 7eaf991ada4e..4f5a3bb8bbe6 100644 --- a/cpp-package/example/inference/imagenet_inference.cpp +++ b/cpp-package/example/inference/imagenet_inference.cpp @@ -82,6 +82,7 @@ class Predictor { const std::string& model_params_file, const Shape& input_shape, bool use_gpu, + bool enable_tensorrt, const std::string& dataset, const int data_nthreads, const std::string& data_layer_type, @@ -98,6 +99,13 @@ class Predictor { bool AdvanceDataIter(int skipped_batches); void LoadModel(const std::string& model_json_file); void LoadParameters(const std::string& model_parameters_file); + void SplitParamMap(const std::map ¶mMap, + std::map *argParamInTargetContext, + std::map *auxParamInTargetContext, + Context targetContext); + void ConvertParamMapToTargetContext(const std::map ¶mMap, + std::map *paramMapInTargetContext, + Context targetContext); void InitParameters(); inline bool FileExists(const std::string &name) { @@ -115,6 +123,7 @@ class Predictor { MXDataIter *val_iter_; bool use_gpu_; + bool enable_tensorrt_; std::string dataset_; int data_nthreads_; std::string data_layer_type_; @@ -134,14 +143,15 @@ class Predictor { * the input shape is required to be in format Shape(1, number_of_channels, height, width) * The input image will be resized to (height x width) size before running the inference. * 4. use_gpu: determine if run inference on GPU - * 5. dataset: data file (.rec) to be used for inference - * 6. data_nthreads: number of threads for data loading - * 7. data_layer_type: data type for data layer - * 8. rgb_mean: mean value to be subtracted on R/G/B channel - * 9. rgb_std: standard deviation on R/G/B channel - * 10. shuffle_chunk_seed: shuffling chunk seed - * 11. seed: shuffling seed - * 12. benchmark: use dummy data for inference + * 5. enable_tensorrt: determine if enable TensorRT + * 6. dataset: data file (.rec) to be used for inference + * 7. data_nthreads: number of threads for data loading + * 8. data_layer_type: data type for data layer + * 9. rgb_mean: mean value to be subtracted on R/G/B channel + * 10. rgb_std: standard deviation on R/G/B channel + * 11. shuffle_chunk_seed: shuffling chunk seed + * 12. seed: shuffling seed + * 13. benchmark: use dummy data for inference * * The constructor will: * 1. Create ImageRecordIter based on the given dataset file. @@ -152,6 +162,7 @@ Predictor::Predictor(const std::string& model_json_file, const std::string& model_params_file, const Shape& input_shape, bool use_gpu, + bool enable_tensorrt, const std::string& dataset, const int data_nthreads, const std::string& data_layer_type, @@ -161,6 +172,7 @@ Predictor::Predictor(const std::string& model_json_file, int seed, bool benchmark) : input_shape_(input_shape), use_gpu_(use_gpu), + enable_tensorrt_(enable_tensorrt), dataset_(dataset), data_nthreads_(data_nthreads), data_layer_type_(data_layer_type), @@ -182,12 +194,12 @@ Predictor::Predictor(const std::string& model_json_file, // Load the model LoadModel(model_json_file); // Initilize the parameters - // benchmark=false, load parameters from file - // benchmark=true, randomly initialize parameters - if (!benchmark_) { - LoadParameters(model_params_file); - } else { + // benchmark=true && model_params_file.empty(), randomly initialize parameters + // else, load parameters + if (benchmark_ && model_params_file.empty()) { InitParameters(); + } else { + LoadParameters(model_params_file); } int dtype = GetDataLayerType(); @@ -289,9 +301,11 @@ void Predictor::LoadModel(const std::string& model_json_file) { } LG << "Loading the model from " << model_json_file << std::endl; net_ = Symbol::Load(model_json_file); + if (enable_tensorrt_) { + net_ = net_.GetBackendSymbol("TensorRT"); + } } - /* * The following function loads the model parameters. */ @@ -303,20 +317,50 @@ void Predictor::LoadParameters(const std::string& model_parameters_file) { LG << "Loading the model parameters from " << model_parameters_file << std::endl; std::map parameters; NDArray::Load(model_parameters_file, 0, ¶meters); - for (const auto &k : parameters) { - if (k.first.substr(0, 4) == "aux:") { - auto name = k.first.substr(4, k.first.size() - 4); - aux_map_[name] = k.second.Copy(global_ctx_); - } - if (k.first.substr(0, 4) == "arg:") { - auto name = k.first.substr(4, k.first.size() - 4); - args_map_[name] = k.second.Copy(global_ctx_); - } + if (enable_tensorrt_) { + std::map intermediate_args_map; + std::map intermediate_aux_map; + SplitParamMap(parameters, &intermediate_args_map, &intermediate_aux_map, Context::cpu()); + contrib::InitTensorRTParams(net_, &intermediate_args_map, &intermediate_aux_map); + ConvertParamMapToTargetContext(intermediate_args_map, &args_map_, global_ctx_); + ConvertParamMapToTargetContext(intermediate_aux_map, &aux_map_, global_ctx_); + } else { + SplitParamMap(parameters, &args_map_, &aux_map_, global_ctx_); } /*WaitAll is need when we copy data between GPU and the main memory*/ NDArray::WaitAll(); } +/* + * The following function split loaded param map into arg parm + * and aux param with target context + */ +void Predictor::SplitParamMap(const std::map ¶mMap, + std::map *argParamInTargetContext, + std::map *auxParamInTargetContext, + Context targetContext) { + for (const auto& pair : paramMap) { + std::string type = pair.first.substr(0, 4); + std::string name = pair.first.substr(4); + if (type == "arg:") { + (*argParamInTargetContext)[name] = pair.second.Copy(targetContext); + } else if (type == "aux:") { + (*auxParamInTargetContext)[name] = pair.second.Copy(targetContext); + } + } +} + +/* + * The following function copy the param map into the target context + */ +void Predictor::ConvertParamMapToTargetContext(const std::map ¶mMap, + std::map *paramMapInTargetContext, + Context targetContext) { + for (const auto& pair : paramMap) { + (*paramMapInTargetContext)[pair.first] = pair.second.Copy(targetContext); + } +} + /* * The following function randomly initializes the parameters when benchmark_ is true. */ @@ -517,6 +561,8 @@ void printUsage() { << "--data_layer_type " << std::endl << "--gpu " << std::endl + << "--enableTRT " << std::endl << "--benchmark " << std::endl; } @@ -528,6 +574,7 @@ int main(int argc, char** argv) { std::string input_rgb_mean("0 0 0"); std::string input_rgb_std("1 1 1"); bool use_gpu = false; + bool enable_tensorrt = false; bool benchmark = false; int batch_size = 64; int num_skipped_batches = 0; @@ -575,6 +622,9 @@ int main(int argc, char** argv) { data_layer_type = (index < argc ? argv[index]:data_layer_type); } else if (strcmp("--gpu", argv[index]) == 0) { use_gpu = true; + } else if (strcmp("--enableTRT", argv[index]) == 0) { + use_gpu = true; + enable_tensorrt = true; } else if (strcmp("--benchmark", argv[index]) == 0) { benchmark = true; } else if (strcmp("--help", argv[index]) == 0) { @@ -584,7 +634,9 @@ int main(int argc, char** argv) { index++; } - if (model_file_json.empty() || (!benchmark && model_file_params.empty())) { + if (model_file_json.empty() + || (!benchmark && model_file_params.empty()) + || (enable_tensorrt && model_file_params.empty())) { LG << "ERROR: Model details such as symbol, param files are not specified"; printUsage(); return 1; @@ -597,8 +649,8 @@ int main(int argc, char** argv) { std::vector rgb_std = createVectorFromString(input_rgb_std); // Initialize the predictor object - Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, dataset, - data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed, + Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, enable_tensorrt, + dataset, data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed, seed, benchmark); if (benchmark) { diff --git a/cpp-package/include/mxnet-cpp/MxNetCpp.h b/cpp-package/include/mxnet-cpp/MxNetCpp.h index 7ac039dd8816..a513565377fd 100644 --- a/cpp-package/include/mxnet-cpp/MxNetCpp.h +++ b/cpp-package/include/mxnet-cpp/MxNetCpp.h @@ -39,5 +39,6 @@ #include "mxnet-cpp/io.hpp" #include "mxnet-cpp/metric.h" #include "mxnet-cpp/initializer.h" +#include "mxnet-cpp/contrib.h" #endif // MXNET_CPP_MXNETCPP_H_ diff --git a/cpp-package/include/mxnet-cpp/contrib.h b/cpp-package/include/mxnet-cpp/contrib.h new file mode 100644 index 000000000000..890ab2bf0062 --- /dev/null +++ b/cpp-package/include/mxnet-cpp/contrib.h @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! +* Copyright (c) 2019 by Contributors +* \file contrib.h +* \brief utility function to enable some contrib features +* \author Haohuan Wang +*/ +#ifndef MXNET_CPP_CONTRIB_H_ +#define MXNET_CPP_CONTRIB_H_ + +#include +#include +#include +#include +#include "mxnet-cpp/symbol.h" + +namespace mxnet { +namespace cpp { +namespace details { + + /*! + * split a string with the given delimiter + * @param str string to be parsed + * @param delimiter delimiter + * @return delimited list of string + */ + inline std::vector split(const std::string& str, const std::string& delimiter) { + std::vector splitted; + size_t last = 0; + size_t next = 0; + while ((next = str.find(delimiter, last)) != std::string::npos) { + splitted.push_back(str.substr(last, next - last)); + last = next + 1; + } + splitted.push_back(str.substr(last)); + return splitted; + } + +} // namespace details + +namespace contrib { + + // needs to be same with + // https://github.com/apache/incubator-mxnet/blob/1c874cfc807cee755c38f6486e8e0f4d94416cd8/src/operator/subgraph/tensorrt/tensorrt-inl.h#L190 + static const std::string TENSORRT_SUBGRAPH_PARAM_IDENTIFIER = "subgraph_params_names"; + // needs to be same with + // https://github.com/apache/incubator-mxnet/blob/master/src/operator/subgraph/tensorrt/tensorrt.cc#L244 + static const std::string TENSORRT_SUBGRAPH_PARAM_PREFIX = "subgraph_param_"; + /*! + * this is a mimic to https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/contrib/tensorrt.py#L37 + * @param symbol symbol that already called subgraph api + * @param argParams original arg params, params needed by tensorrt will be removed after calling this function + * @param auxParams original aux params, params needed by tensorrt will be removed after calling this function + */ + inline void InitTensorRTParams(const mxnet::cpp::Symbol& symbol, + std::map *argParams, + std::map *auxParams) { + mxnet::cpp::Symbol internals = symbol.GetInternals(); + mx_uint numSymbol = internals.GetNumOutputs(); + for (mx_uint i = 0; i < numSymbol; ++i) { + std::map attrs = internals[i].ListAttributes(); + if (attrs.find(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER) != attrs.end()) { + std::string new_params_names; + std::map tensorrtParams; + std::vector keys = details::split( + attrs[TENSORRT_SUBGRAPH_PARAM_IDENTIFIER], ";"); + for (const auto& key : keys) { + if (argParams->find(key) != argParams->end()) { + new_params_names += key + ";"; + tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*argParams)[key]; + argParams->erase(key); + } else if (auxParams->find(key) != auxParams->end()) { + new_params_names += key + ";"; + tensorrtParams[TENSORRT_SUBGRAPH_PARAM_PREFIX + key] = (*auxParams)[key]; + auxParams->erase(key); + } + } + std::map new_attrs = {}; + for (const auto& kv : tensorrtParams) { + // passing the ndarray address into TRT node attributes to get the weight + uint64_t address = reinterpret_cast(kv.second.GetHandle()); + new_attrs[kv.first] = std::to_string(address); + } + if (!new_attrs.empty()) { + internals[i].SetAttributes(new_attrs); + internals[i].SetAttribute(TENSORRT_SUBGRAPH_PARAM_IDENTIFIER, + new_params_names.substr(0, new_params_names.length() - 1)); + } + } + } +} + +} // namespace contrib +} // namespace cpp +} // namespace mxnet + +#endif // MXNET_CPP_CONTRIB_H_ diff --git a/cpp-package/include/mxnet-cpp/symbol.h b/cpp-package/include/mxnet-cpp/symbol.h index a25824cad602..d72eeaad1a5a 100644 --- a/cpp-package/include/mxnet-cpp/symbol.h +++ b/cpp-package/include/mxnet-cpp/symbol.h @@ -178,6 +178,23 @@ class Symbol { std::vector ListOutputs() const; /*! \return get the descriptions of auxiliary data for this symbol */ std::vector ListAuxiliaryStates() const; + /*! \return get all attributes for this symbol */ + std::map ListAttributes() const; + /*! + * \brief set key-value attribute to the symbol + * @param key string represent the key for the attribute + * @param value string represent the value for the attribute + */ + void SetAttribute(const std::string& key, const std::string& value); + /*! + * \brief set a series of key-value attribute to the symbol + * @param attrs string:string map represent the key value attributes + */ + void SetAttributes(const std::map& attrs); + /*! \return get number of outputs for this symbol */ + mx_uint GetNumOutputs() const; + /*! \return get the new symbol through subgraph API for this symbol */ + mxnet::cpp::Symbol GetBackendSymbol(const std::string& backendName) const; /*! \return get the name of the symbol */ std::string GetName() const; /*! diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index 2e3fb7a2d5de..811d894e0ffa 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -172,6 +172,41 @@ inline std::vector Symbol::ListAuxiliaryStates() const { return ret; } +inline std::map Symbol::ListAttributes() const { + mx_uint size; + const char** pairs; + CHECK_EQ(MXSymbolListAttrShallow(GetHandle(), &size, &pairs), 0); + std::map attributes; + for (mx_uint i = 0; i < size; ++i) { + // pairs is 2 * size with key, value pairs according to + // https://github.com/apache/incubator-mxnet/blob/master/include/mxnet/c_api.h#L1428 + attributes[pairs[2 * i]] = pairs[2 * i + 1]; + } + return attributes; +} + +inline void Symbol::SetAttribute(const std::string &key, const std::string &value) { + CHECK_EQ(MXSymbolSetAttr(GetHandle(), key.c_str(), value.c_str()), 0); +} + +inline void Symbol::SetAttributes(const std::map &attrs) { + for (const auto& kv : attrs) { + SetAttribute(kv.first, kv.second); + } +} + +inline mx_uint Symbol::GetNumOutputs() const { + mx_uint numOutputs; + CHECK_EQ(MXSymbolGetNumOutputs(GetHandle(), &numOutputs), 0); + return numOutputs; +} + +inline mxnet::cpp::Symbol Symbol::GetBackendSymbol(const std::string &backendName) const { + SymbolHandle symbolHandle; + CHECK_EQ(MXGenBackendSubgraph(GetHandle(), backendName.c_str(), &symbolHandle), 0); + return mxnet::cpp::Symbol(symbolHandle); +} + inline std::string Symbol::GetName() const { int success; const char* out_name;