From f54b7379d88b17b39ccdca3d6f87e05d7bf93296 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 3 Dec 2020 19:28:51 +0000 Subject: [PATCH 1/3] [TRT] Support batch norm for all ranks <=5, and all axis --- python/tvm/relay/op/contrib/tensorrt.py | 5 ++ src/runtime/contrib/tensorrt/tensorrt_ops.cc | 49 +++++++++++++++++--- tests/python/contrib/test_tensorrt.py | 6 +++ 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index acd4f4740b2d..c8f655760119 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -341,6 +341,11 @@ def batch_norm_annotate_fn(expr): # pylint: disable=unused-variable if any([x.checked_type.dtype != "float32" for x in args]): logger.info("Only float32 inputs are supported for TensorRT.") return False + if len(args[0].checked_type.shape) == 5 and get_tensorrt_version() < (6, 0, 1): + logger.info("nn.batch_norm: TensorRT 6.0.1 or higher is required for rank 5 inputs.") + if len(args[0].checked_type.shape) > 5: + logger.info("nn.batch_norm: Input rank must be 5 or less.") + return False if int(attrs.axis) not in (1, 3): logger.info("nn.batch_norm: axis is %d but must be 1 or 3.", int(attrs.axis)) return False diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc index c3ff1c45f50e..db853d837edf 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc @@ -386,8 +386,35 @@ class BatchNormOpConverter : public TensorRTOpConverter { const int axis = std::stoi(params->node.GetAttr>("axis")[0]); const bool scale = std::stoi(params->node.GetAttr>("scale")[0]); const bool center = std::stoi(params->node.GetAttr>("center")[0]); - ICHECK(axis == 1 || axis == 3); - const bool need_transpose = axis == 3; + auto input_dims = TrtDimsToVector(input->getDimensions()); + const size_t min_rank = TRT_HAS_IMPLICIT_BATCH(params) ? 3 : 4; + const size_t max_rank = TRT_HAS_IMPLICIT_BATCH(params) ? 4 : 5; + ICHECK_LE(input_dims.size(), max_rank); + const bool need_reshape = input_dims.size() < min_rank; + const bool need_transpose = axis != 1; + + // Reshape if needed + if (need_reshape) { + // Add dims of size 1 until rank is required_rank. + std::vector new_shape(input_dims); + while (new_shape.size() < min_rank) new_shape.insert(new_shape.end(), 1); + input = Reshape(params, input, new_shape); + } + + // Transpose if needed. + const int input_rank_with_batch = + input->getDimensions().nbDims + (TRT_HAS_IMPLICIT_BATCH(params) ? 1 : 0); + ICHECK(input_rank_with_batch == 4 || input_rank_with_batch == 5); + std::vector transpose_order(input_rank_with_batch); + if (need_transpose) { + // Move axis dim to first dim after batch. + for (int i = 0; i < input_rank_with_batch; ++i) { + transpose_order[i] = i; + } + transpose_order[1] = axis; + transpose_order[axis] = 1; + input = Transpose(params, input, transpose_order); + } void* weight_scale_ptr = new float[gamma.count]; nvinfer1::Weights weight_scale{nvinfer1::DataType::kFLOAT, weight_scale_ptr, gamma.count}; @@ -414,15 +441,23 @@ class BatchNormOpConverter : public TensorRTOpConverter { shift_ptr[i] += beta_ptr[i]; } } - if (need_transpose) { - input = Transpose(params, input, {0, 3, 1, 2}); - } - nvinfer1::IScaleLayer* scale_layer = params->network->addScale( + +#if TRT_VERSION_GE(6, 0, 1) + const int channel_dim = TRT_HAS_IMPLICIT_BATCH(params) ? 0 : 1; + nvinfer1::IScaleLayer* scale_layer = params->network->addScaleNd( + *input, nvinfer1::ScaleMode::kCHANNEL, weight_shift, weight_scale, power, channel_dim); +#else + ICHECK_EQ(input->getDimensions().nbDims(), 3); + nvinfer1::IScaleLayer* scale_layer = params->network->addScaleNd( *input, nvinfer1::ScaleMode::kCHANNEL, weight_shift, weight_scale, power); +#endif ICHECK(scale_layer != nullptr); auto output = scale_layer->getOutput(0); if (need_transpose) { - output = Transpose(params, output, {0, 2, 3, 1}); + output = Transpose(params, output, transpose_order); + } + if (need_reshape) { + output = Reshape(params, output, input_dims); } params->outputs.push_back(output); } diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index aadfa1303655..9b62ee2c4087 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -710,6 +710,12 @@ def get_graph(x_shape, param_shape, axis=1, epsilon=1e-5): run_and_verify_func(get_graph((1, 64, 56, 56), (64,))) run_and_verify_func(get_graph((1, 56, 56, 64), (64,), axis=3, epsilon=1.001e-05)) + run_and_verify_func(get_graph((1, 4, 8, 4), (8,), axis=2)) + run_and_verify_func(get_graph((1, 8, 4, 4, 4), (8,), axis=1)) + run_and_verify_func(get_graph((1, 4, 8, 4, 4), (8,), axis=2)) + run_and_verify_func(get_graph((1, 4, 4, 4, 8), (8,), axis=4)) + run_and_verify_func(get_graph((1, 8), (8,), axis=1)) + run_and_verify_func(get_graph((1, 3, 8), (8,), axis=2)) def test_unary(): From f51f808378b0a4c4e4edd1776ad1cf443d82a3fb Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 3 Dec 2020 19:56:07 +0000 Subject: [PATCH 2/3] Add return false --- python/tvm/relay/op/contrib/tensorrt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index c8f655760119..bda71468d9e2 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -343,6 +343,7 @@ def batch_norm_annotate_fn(expr): # pylint: disable=unused-variable return False if len(args[0].checked_type.shape) == 5 and get_tensorrt_version() < (6, 0, 1): logger.info("nn.batch_norm: TensorRT 6.0.1 or higher is required for rank 5 inputs.") + return False if len(args[0].checked_type.shape) > 5: logger.info("nn.batch_norm: Input rank must be 5 or less.") return False From c1b23a2c752848824f8ca5b539021a46ac5522f3 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Fri, 4 Dec 2020 18:40:55 +0000 Subject: [PATCH 3/3] Fix TRT < 6 build --- src/runtime/contrib/tensorrt/tensorrt_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc index db853d837edf..1e6867b83cff 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc @@ -448,7 +448,7 @@ class BatchNormOpConverter : public TensorRTOpConverter { *input, nvinfer1::ScaleMode::kCHANNEL, weight_shift, weight_scale, power, channel_dim); #else ICHECK_EQ(input->getDimensions().nbDims(), 3); - nvinfer1::IScaleLayer* scale_layer = params->network->addScaleNd( + nvinfer1::IScaleLayer* scale_layer = params->network->addScale( *input, nvinfer1::ScaleMode::kCHANNEL, weight_shift, weight_scale, power); #endif ICHECK(scale_layer != nullptr);