Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Add unit tests for TensorRT integration and fix some bugs #15399

Merged
merged 20 commits into from
Oct 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ci/docker/install/tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ popd

# Install TensorRT
echo "TensorRT build enabled. Installing TensorRT."
wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0_1-1_amd64.deb
wget -qO tensorrt.deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
dpkg -i tensorrt.deb
apt-get update
apt-get install -y --allow-downgrades libnvinfer5=5.1.2-1+cuda10.0
apt-get install -y --allow-downgrades libnvinfer-dev=5.1.2-1+cuda10.0
apt-get install -y --allow-downgrades libnvinfer5=5.1.5-1+cuda10.0
apt-get install -y --allow-downgrades libnvinfer-dev=5.1.5-1+cuda10.0
apt-mark hold libnvinfer5 libnvinfer-dev
rm tensorrt.deb
2 changes: 2 additions & 0 deletions python/mxnet/contrib/tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def init_tensorrt_params(sym, arg_params, aux_params):
:param aux_params: aux_params
:return arg_params, aux_params: remaining params that are not in TensorRT nodes
"""
arg_params = arg_params.copy()
aux_params = aux_params.copy()
for s in sym.get_internals():
new_params_names = ""
tensorrt_params = {}
Expand Down
15 changes: 11 additions & 4 deletions src/operator/subgraph/tensorrt/nnvm_to_onnx-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,13 @@ void ConvDeconvConvertHelper(NodeProto *node_proto,
ConvDeconvType type);

// Forward declarations
void ConvertConvolution(NodeProto *node_proto,
void ConvertIdentity(NodeProto* node_proto,
const NodeAttrs &attrs,
const nnvm::IndexedGraph& ig,
const array_view<IndexedGraph::NodeEntry> &inputs);

void ConvertConvolution(
NodeProto *node_proto,
const NodeAttrs &attrs,
const nnvm::IndexedGraph &ig,
const array_view<IndexedGraph::NodeEntry> &inputs);
Expand Down Expand Up @@ -139,12 +145,12 @@ void ConvertElementwiseAdd(NodeProto *node_proto,
const nnvm::IndexedGraph &ig,
const array_view<IndexedGraph::NodeEntry> &inputs);

void ConvertElementwiseSub(NodeProto *node_proto,
void ConvertElementwiseMul(NodeProto *node_proto,
const NodeAttrs &attrs,
const nnvm::IndexedGraph &ig,
const array_view<IndexedGraph::NodeEntry> &inputs);

void ConvertElementwiseMul(NodeProto *node_proto,
void ConvertElementwiseSub(NodeProto *node_proto,
const NodeAttrs &attrs,
const nnvm::IndexedGraph &ig,
const array_view<IndexedGraph::NodeEntry> &inputs);
Expand All @@ -168,6 +174,7 @@ std::string ConvertNnvmGraphToOnnx(const nnvm::Graph &g,
std::unordered_map<std::string, NDArray>* params_map);

static const std::unordered_map<std::string, ConverterFunction> converter_map = {
{"_copy", ConvertIdentity},
{"Activation", ConvertActivation},
{"BatchNorm", ConvertBatchNorm},
{"clip", ConvertClip},
Expand All @@ -176,8 +183,8 @@ static const std::unordered_map<std::string, ConverterFunction> converter_map =
{"Concat", ConvertConcatenate},
{"Dropout", ConvertDropout},
{"elemwise_add", ConvertElementwiseAdd},
{"elemwise_sub", ConvertElementwiseSub},
{"elemwise_mul", ConvertElementwiseMul},
{"elemwise_sub", ConvertElementwiseSub},
{"Flatten", ConvertFlatten},
{"FullyConnected", ConvertFullyConnected},
{"Pad", ConvertPad},
Expand Down
43 changes: 34 additions & 9 deletions src/operator/subgraph/tensorrt/nnvm_to_onnx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ std::string ConvertNnvmGraphToOnnx(
return serialized_onnx_graph;
}

void ConvertIdentity(NodeProto* node_proto, const NodeAttrs& attrs,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any idea if TRT actually optimizes this out? I've seen this in a few prod services :-/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this should be optimized by ONNX-TRT

const nnvm::IndexedGraph& /*ig*/,
const array_view<IndexedGraph::NodeEntry>& /*inputs*/) {
node_proto->set_op_type("Identity");
}

template <class ConvDeconvParam>
void ConvDeconvConvertHelper(NodeProto* node_proto, const NodeAttrs& attrs,
const nnvm::IndexedGraph& /*ig*/,
Expand Down Expand Up @@ -262,10 +268,12 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs,
const bool global_pool = pooling_param.global_pool;

if (global_pool) {
if (pool_type == 0) {
if (pool_type == pool_enum::kMaxPooling) {
node_proto->set_op_type("GlobalMaxPool");
} else {
} else if (pool_type == pool_enum::kAvgPooling) {
node_proto->set_op_type("GlobalAveragePool");
} else {
LOG(FATAL) << "Pool type of node '" << attrs.name << "' unsupported: " << attrs.name;
}
return;
}
Expand Down Expand Up @@ -298,12 +306,29 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs,
strides->add_ints(static_cast<int64>(kval));
}

if (pool_type == 0) {
// ceil_mode
AttributeProto* const ceil_mode = node_proto->add_attribute();
ceil_mode->set_name("ceil_mode");
ceil_mode->set_type(AttributeProto::INT);
ceil_mode->set_i(static_cast<int64>(pooling_param.pooling_convention == pool_enum::kFull));

if (pool_type == pool_enum::kMaxPooling) {
node_proto->set_op_type("MaxPool");
} else {
} else if (pool_type == pool_enum::kAvgPooling) {
node_proto->set_op_type("AveragePool");
} // average pooling
// not global pooling
} else {
LOG(FATAL) << "Pool type of node '" << attrs.name << "' unsupported: " << attrs.name;
}

// count_include_pad
AttributeProto* const count_include_pad = node_proto->add_attribute();
count_include_pad->set_name("count_include_pad");
count_include_pad->set_type(AttributeProto::INT);
if (pooling_param.count_include_pad.has_value()) {
count_include_pad->set_i(pooling_param.count_include_pad.value());
} else {
count_include_pad->set_i(1);
}
} // end ConvertPooling

void ConvertRelu(NodeProto* node_proto, const NodeAttrs& /*attrs*/,
Expand Down Expand Up @@ -608,7 +633,7 @@ void ConvertOutput(
void ConvertClip(NodeProto* node_proto, const NodeAttrs& attrs,
const nnvm::IndexedGraph& /*ig*/,
const array_view<IndexedGraph::NodeEntry>& /*inputs*/) {
const auto param = nnvm::get<ClipParam>(attrs.parsed);
const auto& param = nnvm::get<ClipParam>(attrs.parsed);

node_proto->set_op_type("Clip");

Expand All @@ -628,7 +653,7 @@ void ConvertClip(NodeProto* node_proto, const NodeAttrs& attrs,
void ConvertPad(NodeProto* node_proto, const NodeAttrs& attrs,
const nnvm::IndexedGraph& /*ig*/,
const array_view<IndexedGraph::NodeEntry>& /*inputs*/) {
const auto param = nnvm::get<PadParam>(attrs.parsed);
const auto& param = nnvm::get<PadParam>(attrs.parsed);

node_proto->set_op_type("Pad");

Expand All @@ -647,7 +672,7 @@ void ConvertPad(NodeProto* node_proto, const NodeAttrs& attrs,
mode->set_s("reflect");
break;
default:
throw dmlc::Error("Such mode of padding doesn't exist doesn't exist");
throw dmlc::Error("Such mode of padding doesn't exist");
}

// pads
Expand Down
2 changes: 1 addition & 1 deletion src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ std::tuple<unique_ptr<nvinfer1::ICudaEngine>,
auto trt_logger = std::unique_ptr<TRT_Logger>(new TRT_Logger(verbosity));
auto trt_builder = nvinfer1::createInferBuilder(*trt_logger);
auto trt_network = trt_builder->createNetwork();
auto trt_parser = InferObject(nvonnxparser::createParser(trt_network, *trt_logger));
auto trt_parser = InferObject(nvonnxparser::createParser(*trt_network, *trt_logger));
::ONNX_NAMESPACE::ModelProto parsed_model;
// We check for a valid parse, but the main effect is the side effect
// of populating parsed_model
Expand Down
119 changes: 108 additions & 11 deletions src/operator/subgraph/tensorrt/tensorrt-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@
#include <string>
#include <vector>

#include "../../nn/activation-inl.h"
#include "../../nn/batch_norm-inl.h"
#include "../../nn/concat-inl.h"
#include "../../nn/convolution-inl.h"
#include "../../nn/deconvolution-inl.h"
#include "../../nn/dropout-inl.h"
#include "../../nn/fully_connected-inl.h"
#include "../../nn/pooling-inl.h"
#include "../common.h"
#include "../subgraph_property.h"
#include "nnvm_to_onnx-inl.h"
Expand Down Expand Up @@ -85,18 +93,12 @@ struct TRTEngineParam {
class TensorrtSelector : public SubgraphSelector {
public:
const std::unordered_set<std::string> unconditionalTRTops = {
"BatchNorm",
"_copy",
"clip",
"Concat",
"Convolution",
"Deconvolution",
"Dropout",
"elemwise_add",
"elemwise_sub",
"elemwise_mul",
"Flatten",
"FullyConnected",
"mean",
"Pad",
"relu",
"rsqrt",
Expand All @@ -112,13 +114,94 @@ class TensorrtSelector : public SubgraphSelector {

bool isTRTCompatible(const nnvm::Node &n) {
const std::string op_name = n.op()->name;
if (op_name == "FullyConnected") {
const auto& param = nnvm::get<FullyConnectedParam>(n.attrs.parsed);
return !param.no_bias;
}

if (op_name == "Pooling") {
return (n.attrs.dict.at("pool_type") == "avg" ||
n.attrs.dict.at("pool_type") == "max");
const auto& param = nnvm::get<PoolingParam>(n.attrs.parsed);
if (param.layout.has_value()) {
if (param.layout.value() == mshadow::kNHWC) {
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name
<< ") is not supported by TensorRT";
return false;
} else if (param.layout.value() == mshadow::kNDHWC) {
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name
<< ") is not supported by TensorRT";
return false;
}
}
if (param.pooling_convention != pool_enum::kValid && !param.global_pool)
return false;
if (param.pool_type == pool_enum::kAvgPooling) {
if ((!param.global_pool) &&
(!param.count_include_pad.has_value() || param.count_include_pad.value()))
return false;
return true;
} else if (param.pool_type == pool_enum::kMaxPooling) {
return true;
} else {
return false;
}
}

if (unconditionalTRTops.count(op_name)) {
return true;
if (op_name == "Convolution") {
const auto& param = nnvm::get<ConvolutionParam>(n.attrs.parsed);
if (!param.layout.has_value())
return true;
switch (param.layout.value()) {
case mshadow::kNCHW:
case mshadow::kNCW:
case mshadow::kNCDHW:
return true;
case mshadow::kNHWC:
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name
<< ") is not supported by TensorRT";
return false;
case mshadow::kNDHWC:
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name
<< ") is not supported by TensorRT";
return false;
default:
LOG(INFO) << "Warning: Layout (node: " << n.attrs.name
<< ") is unknown (so unsupported by TensorRT)";
return false;
}
}

if (op_name == "Deconvolution") {
const auto& param = nnvm::get<DeconvolutionParam>(n.attrs.parsed);
if (!param.layout.has_value())
return true;
switch (param.layout.value()) {
case mshadow::kNCHW:
case mshadow::kNCW:
case mshadow::kNCDHW:
return true;
case mshadow::kNHWC:
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name
<< ") is no tsupported by TensorRT";
return false;
case mshadow::kNDHWC:
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name
<< ") is not supported by TensorRT";
return false;
default:
LOG(INFO) << "Warning: Layout (node: " << n.attrs.name
<< ") is unknown (so unsupported by TensorRT)";
return false;
}
}

if (op_name == "Concat") {
const auto& param = nnvm::get<ConcatParam>(n.attrs.parsed);
return (param.dim != 0);
}

if (op_name == "Dropout") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, will TensorRT optimize this out? We don't want it at inference time right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dropout have always been seen as identity function in MXNet-TensorRT integration so I don't see any changement on this, regarding to whether or not identity is actually doing a copy or not I'm not quite sure, here is the onnx-tensorrt conversion: https://github.com/onnx/onnx-tensorrt/blob/0ab159579551cabfa05fd66f338357f116e96835/trt_utils.hpp#L169-L180

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, non-blocking comment for this PR. I'm just thinking about adding a warning in the future if people are using TRT with operations that don't make sense at inference time (Dropout, Ident, Empty Concats or Copies, etc.)

const auto& param = nnvm::get<DropoutParam>(n.attrs.parsed);
return param.mode == dropout::kTraining && param.axes.ndim() == 0;
}

if (op_name == "Activation") {
Expand All @@ -127,6 +210,20 @@ class TensorrtSelector : public SubgraphSelector {
n.attrs.dict.at("act_type") == "sigmoid";
}

if (op_name == "BatchNorm") {
const auto& param = nnvm::get<BatchNormParam>(n.attrs.parsed);
if (param.axis != 1) {
LOG(INFO) << "Warning: Only Layout NC(D)(H)W are supported by TensorRT "
<< "(node " << n.attrs.name << ")";
return false;
}
return true;
}

if (unconditionalTRTops.count(op_name)) {
return true;
}

return false;
}

Expand Down
2 changes: 1 addition & 1 deletion src/operator/subgraph/tensorrt/tensorrt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ inline bool TRTInferType(const nnvm::NodeAttrs& attrs,
auto it_params = params_map.find(node->attrs.name);
auto it_inputs = inputs_to_idx.find(node->attrs.name);
if (it_params != params_map.end()) {
types[eid] = it_params->second.dtype();
types[eid] = -1;
} else if (it_inputs != inputs_to_idx.end()) {
types[eid] = in_types->at(it_inputs->second);
} else {
Expand Down
30 changes: 0 additions & 30 deletions tests/python/tensorrt/common.py

This file was deleted.

31 changes: 0 additions & 31 deletions tests/python/tensorrt/lenet5_common.py

This file was deleted.

Loading