-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Add unit tests for TensorRT integration and fix some bugs #15399
Changes from all commits
518f5bf
01f9da0
0ce9e87
36e6fc7
04a5764
063dbad
064938d
96440fe
f7e8e5c
99575b2
8f8d4d8
76bbcc7
61f109c
fddac76
5c8c601
82565ce
ca332a5
2ebc4bd
60cd171
cdd3b2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
+50 −16 | CMakeLists.txt | |
+9 −9 | FancyActivation.cu | |
+57 −13 | FancyActivation.hpp | |
+3 −5 | ImporterContext.hpp | |
+1 −1 | InstanceNormalization.cpp | |
+48 −4 | InstanceNormalization.hpp | |
+287 −32 | ModelImporter.cpp | |
+6 −2 | ModelImporter.hpp | |
+27 −6 | NvOnnxParser.h | |
+30 −0 | NvOnnxParserTypedefs.h | |
+6 −0 | OnnxAttrs.cpp | |
+59 −39 | README.md | |
+3 −2 | ResizeNearest.cu | |
+47 −4 | ResizeNearest.hpp | |
+66 −13 | ShapedWeights.cpp | |
+2 −0 | ShapedWeights.hpp | |
+4 −2 | Split.cu | |
+47 −4 | Split.hpp | |
+9 −0 | Status.hpp | |
+627 −452 | builtin_op_importers.cpp | |
+0 −43 | builtin_plugins.cpp | |
+119 −0 | common.hpp | |
+9 −0 | contributing.md | |
+167 −0 | getSupportedAPITest.cpp | |
+14 −94 | main.cpp | |
+2 −2 | onnx2trt.hpp | |
+5 −61 | onnx2trt_utils.cpp | |
+161 −6 | onnx2trt_utils.hpp | |
+109 −11 | onnx_backend_test.py | |
+0 −2 | onnx_tensorrt/__init__.py | |
+52 −12 | onnx_tensorrt/backend.py | |
+57 −17 | onnx_tensorrt/tensorrt_engine.py | |
+14 −53 | onnx_trt_backend.cpp | |
+145 −0 | operators.md | |
+78 −55 | plugin.cpp | |
+63 −22 | plugin.hpp |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,14 @@ | |
#include <string> | ||
#include <vector> | ||
|
||
#include "../../nn/activation-inl.h" | ||
#include "../../nn/batch_norm-inl.h" | ||
#include "../../nn/concat-inl.h" | ||
#include "../../nn/convolution-inl.h" | ||
#include "../../nn/deconvolution-inl.h" | ||
#include "../../nn/dropout-inl.h" | ||
#include "../../nn/fully_connected-inl.h" | ||
#include "../../nn/pooling-inl.h" | ||
#include "../common.h" | ||
#include "../subgraph_property.h" | ||
#include "nnvm_to_onnx-inl.h" | ||
|
@@ -85,18 +93,12 @@ struct TRTEngineParam { | |
class TensorrtSelector : public SubgraphSelector { | ||
public: | ||
const std::unordered_set<std::string> unconditionalTRTops = { | ||
"BatchNorm", | ||
"_copy", | ||
"clip", | ||
"Concat", | ||
"Convolution", | ||
"Deconvolution", | ||
"Dropout", | ||
"elemwise_add", | ||
"elemwise_sub", | ||
"elemwise_mul", | ||
"Flatten", | ||
"FullyConnected", | ||
"mean", | ||
"Pad", | ||
"relu", | ||
"rsqrt", | ||
|
@@ -112,13 +114,94 @@ class TensorrtSelector : public SubgraphSelector { | |
|
||
bool isTRTCompatible(const nnvm::Node &n) { | ||
const std::string op_name = n.op()->name; | ||
if (op_name == "FullyConnected") { | ||
const auto& param = nnvm::get<FullyConnectedParam>(n.attrs.parsed); | ||
return !param.no_bias; | ||
} | ||
|
||
if (op_name == "Pooling") { | ||
return (n.attrs.dict.at("pool_type") == "avg" || | ||
n.attrs.dict.at("pool_type") == "max"); | ||
const auto& param = nnvm::get<PoolingParam>(n.attrs.parsed); | ||
if (param.layout.has_value()) { | ||
if (param.layout.value() == mshadow::kNHWC) { | ||
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name | ||
<< ") is not supported by TensorRT"; | ||
return false; | ||
} else if (param.layout.value() == mshadow::kNDHWC) { | ||
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name | ||
<< ") is not supported by TensorRT"; | ||
return false; | ||
} | ||
} | ||
if (param.pooling_convention != pool_enum::kValid && !param.global_pool) | ||
return false; | ||
if (param.pool_type == pool_enum::kAvgPooling) { | ||
if ((!param.global_pool) && | ||
(!param.count_include_pad.has_value() || param.count_include_pad.value())) | ||
return false; | ||
return true; | ||
} else if (param.pool_type == pool_enum::kMaxPooling) { | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
if (unconditionalTRTops.count(op_name)) { | ||
return true; | ||
if (op_name == "Convolution") { | ||
const auto& param = nnvm::get<ConvolutionParam>(n.attrs.parsed); | ||
if (!param.layout.has_value()) | ||
return true; | ||
switch (param.layout.value()) { | ||
case mshadow::kNCHW: | ||
case mshadow::kNCW: | ||
case mshadow::kNCDHW: | ||
return true; | ||
case mshadow::kNHWC: | ||
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name | ||
<< ") is not supported by TensorRT"; | ||
return false; | ||
case mshadow::kNDHWC: | ||
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name | ||
<< ") is not supported by TensorRT"; | ||
return false; | ||
default: | ||
LOG(INFO) << "Warning: Layout (node: " << n.attrs.name | ||
<< ") is unknown (so unsupported by TensorRT)"; | ||
return false; | ||
} | ||
} | ||
|
||
if (op_name == "Deconvolution") { | ||
const auto& param = nnvm::get<DeconvolutionParam>(n.attrs.parsed); | ||
if (!param.layout.has_value()) | ||
return true; | ||
switch (param.layout.value()) { | ||
case mshadow::kNCHW: | ||
case mshadow::kNCW: | ||
case mshadow::kNCDHW: | ||
return true; | ||
case mshadow::kNHWC: | ||
LOG(INFO) << "Warning: NHWC layout (node: " << n.attrs.name | ||
<< ") is no tsupported by TensorRT"; | ||
return false; | ||
case mshadow::kNDHWC: | ||
LOG(INFO) << "Warning: NDHWC layout (node: " << n.attrs.name | ||
<< ") is not supported by TensorRT"; | ||
return false; | ||
default: | ||
LOG(INFO) << "Warning: Layout (node: " << n.attrs.name | ||
<< ") is unknown (so unsupported by TensorRT)"; | ||
return false; | ||
} | ||
} | ||
|
||
if (op_name == "Concat") { | ||
const auto& param = nnvm::get<ConcatParam>(n.attrs.parsed); | ||
return (param.dim != 0); | ||
} | ||
|
||
if (op_name == "Dropout") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, will TensorRT optimize this out? We don't want it at inference time right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropout have always been seen as identity function in MXNet-TensorRT integration so I don't see any changement on this, regarding to whether or not identity is actually doing a copy or not I'm not quite sure, here is the onnx-tensorrt conversion: https://github.com/onnx/onnx-tensorrt/blob/0ab159579551cabfa05fd66f338357f116e96835/trt_utils.hpp#L169-L180 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, non-blocking comment for this PR. I'm just thinking about adding a warning in the future if people are using TRT with operations that don't make sense at inference time (Dropout, Ident, Empty Concats or Copies, etc.) |
||
const auto& param = nnvm::get<DropoutParam>(n.attrs.parsed); | ||
return param.mode == dropout::kTraining && param.axes.ndim() == 0; | ||
} | ||
|
||
if (op_name == "Activation") { | ||
|
@@ -127,6 +210,20 @@ class TensorrtSelector : public SubgraphSelector { | |
n.attrs.dict.at("act_type") == "sigmoid"; | ||
} | ||
|
||
if (op_name == "BatchNorm") { | ||
const auto& param = nnvm::get<BatchNormParam>(n.attrs.parsed); | ||
if (param.axis != 1) { | ||
LOG(INFO) << "Warning: Only Layout NC(D)(H)W are supported by TensorRT " | ||
<< "(node " << n.attrs.name << ")"; | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
if (unconditionalTRTops.count(op_name)) { | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
|
This file was deleted.
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any idea if TRT actually optimizes this out? I've seen this in a few prod services :-/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this should be optimized by ONNX-TRT