From ade9abb1ba133bc18e80f208c8886e9ba5546863 Mon Sep 17 00:00:00 2001 From: wenbin Date: Wed, 13 Oct 2021 14:53:07 +0800 Subject: [PATCH 1/4] pool fix (#36388) * pool fix * comments --- .../inference/tensorrt/convert/pool2d_op.cc | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index 1898f28c73ad0..effd6cb5cb824 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -112,6 +112,18 @@ class Pool2dOpConverter : public OpConverter { nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]); nvinfer1::ILayer *layer = nullptr; + nvinfer1::DimsHW pre_pad(0, 0); + nvinfer1::DimsHW post_pad(0, 0); + // paddle Non ceil_mode : Output size = (input size - filter size + 2 * + // padding) / stride (stride size) + 1 + // tensorrt EXPLICIT_ROUND_DOWN: O = floor((M - DK) / S) + 1 + // so if M - DK < 0 we need extra padding + if (input_shape.d[input_dims - 2] - ksize[0] + 2 * paddings[0] < 0) { + post_pad.h() = strides[0] - 1; + } + if (input_shape.d[input_dims - 1] - ksize[1] + 2 * paddings[1] < 0) { + post_pad.w() = strides[1] - 1; + } if (op_desc.HasAttr("enable_int8")) { #if IS_TRT_VERSION_GE(5000) @@ -123,6 +135,16 @@ class Pool2dOpConverter : public OpConverter { if (engine_->with_dynamic_shape()) { if (!adaptive && !global_pooling && !ceil_mode) { + if ((post_pad.w() > 0 || post_pad.h() > 0) && + (padding_algorithm != "SAME")) { + auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, + pre_pad, post_pad); + PADDLE_ENFORCE_NOT_NULL( + pad_layer, platform::errors::Fatal( + "Pad layer in poolOp converter could not be " + "created. The pointer to pad layer is `NULL`.")); + input1 = pad_layer->getOutput(0); + } auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1, nv_pool_type, nv_ksize); pool_layer->setStride(nv_strides); @@ -157,9 +179,8 @@ class Pool2dOpConverter : public OpConverter { if (global_pooling == true) { nv_ksize.d[0] = input_shape.d[input_dims - 2]; nv_ksize.d[1] = input_shape.d[input_dims - 1]; - auto *pool_layer = TRT_ENGINE_ADD_LAYER( - engine_, Pooling, *const_cast(input1), - nv_pool_type, nv_ksize); + auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1, + nv_pool_type, nv_ksize); PADDLE_ENFORCE_NOT_NULL( pool_layer, platform::errors::Fatal( "trt pool layer in converter could not be created.")); @@ -181,28 +202,25 @@ class Pool2dOpConverter : public OpConverter { } if (!adaptive) { - // Under ceil mode, the pre_pad and post_pad are used to - // record the the padding size. In some ceil mode cases, - // we do not need padding, so we initialize the two vars to 0. - - nvinfer1::DimsHW pre_pad(0, 0); - nvinfer1::DimsHW post_pad(0, 0); if (ceil_mode) { // If ceil mode is true, we will pad the appropriate size to the input. DealCeilMode(input_shape, ksize, strides, paddings, &pre_pad, &post_pad, input_dims); - auto *pad_layer = TRT_ENGINE_ADD_LAYER( - engine_, Padding, *const_cast(input1), pre_pad, - post_pad); + } + + if ((post_pad.w() > 0 || post_pad.h() > 0) && + (padding_algorithm != "SAME")) { + auto *pad_layer = + TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, pre_pad, post_pad); PADDLE_ENFORCE_NOT_NULL( pad_layer, platform::errors::Fatal( "Pad layer in poolOp converter could not be " "created. The pointer to pad layer is `NULL`.")); input1 = pad_layer->getOutput(0); } - auto *pool_layer = TRT_ENGINE_ADD_LAYER( - engine_, Pooling, *const_cast(input1), - nv_pool_type, nv_ksize); + + auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1, + nv_pool_type, nv_ksize); PADDLE_ENFORCE_NOT_NULL( pool_layer, platform::errors::Fatal( "trt pool layer in converter could not be created.")); From 39e5b63ecc3f3ffef5674c924a58bb01a5da7210 Mon Sep 17 00:00:00 2001 From: wenbin Date: Wed, 20 Oct 2021 10:44:22 +0800 Subject: [PATCH 2/4] fix (#36557) * fix * remove const --- .../inference/tensorrt/convert/pool2d_op.cc | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index effd6cb5cb824..feaafebe7b031 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -112,17 +112,17 @@ class Pool2dOpConverter : public OpConverter { nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]); nvinfer1::ILayer *layer = nullptr; - nvinfer1::DimsHW pre_pad(0, 0); - nvinfer1::DimsHW post_pad(0, 0); + nvinfer1::DimsHW g_pre_pad(0, 0); + nvinfer1::DimsHW g_post_pad(0, 0); // paddle Non ceil_mode : Output size = (input size - filter size + 2 * // padding) / stride (stride size) + 1 // tensorrt EXPLICIT_ROUND_DOWN: O = floor((M - DK) / S) + 1 // so if M - DK < 0 we need extra padding if (input_shape.d[input_dims - 2] - ksize[0] + 2 * paddings[0] < 0) { - post_pad.h() = strides[0] - 1; + g_post_pad.h() = strides[0] - 1; } if (input_shape.d[input_dims - 1] - ksize[1] + 2 * paddings[1] < 0) { - post_pad.w() = strides[1] - 1; + g_post_pad.w() = strides[1] - 1; } if (op_desc.HasAttr("enable_int8")) { @@ -135,10 +135,10 @@ class Pool2dOpConverter : public OpConverter { if (engine_->with_dynamic_shape()) { if (!adaptive && !global_pooling && !ceil_mode) { - if ((post_pad.w() > 0 || post_pad.h() > 0) && + if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) && (padding_algorithm != "SAME")) { auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, - pre_pad, post_pad); + g_pre_pad, g_post_pad); PADDLE_ENFORCE_NOT_NULL( pad_layer, platform::errors::Fatal( "Pad layer in poolOp converter could not be " @@ -203,22 +203,35 @@ class Pool2dOpConverter : public OpConverter { if (!adaptive) { if (ceil_mode) { + nvinfer1::DimsHW pre_pad(0, 0); + nvinfer1::DimsHW post_pad(0, 0); // If ceil mode is true, we will pad the appropriate size to the input. DealCeilMode(input_shape, ksize, strides, paddings, &pre_pad, &post_pad, input_dims); - } - - if ((post_pad.w() > 0 || post_pad.h() > 0) && - (padding_algorithm != "SAME")) { auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, pre_pad, post_pad); + PADDLE_ENFORCE_NOT_NULL( pad_layer, platform::errors::Fatal( "Pad layer in poolOp converter could not be " "created. The pointer to pad layer is `NULL`.")); input1 = pad_layer->getOutput(0); } - +#if IS_TRT_VERSION_GE(8000) + // Exclude padding pixels from the average mean is not supported well by + // TRT + // so enable padding for trt8.0 above. + if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) && + (padding_algorithm != "SAME") && !ceil_mode) { + auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, + g_pre_pad, g_post_pad); + PADDLE_ENFORCE_NOT_NULL( + pad_layer, platform::errors::Fatal( + "Pad layer in poolOp converter could not be " + "created. The pointer to pad layer is `NULL`.")); + input1 = pad_layer->getOutput(0); + } +#endif auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1, nv_pool_type, nv_ksize); PADDLE_ENFORCE_NOT_NULL( From baf43592d42745741543636efc352914d1fa9e38 Mon Sep 17 00:00:00 2001 From: wenbin Date: Fri, 22 Oct 2021 16:58:51 +0800 Subject: [PATCH 3/4] correct slice serialize data (#36588) * slice * add UT --- .../inference/tensorrt/plugin/slice_op_plugin.cu | 9 +++++---- .../ir/inference/test_trt_slice_plugin.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu index cbd6e3a2e4ffe..2b6541c5515ce 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu @@ -65,6 +65,7 @@ SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) { DeserializeValue(&serial_data, &serial_length, &starts_); DeserializeValue(&serial_data, &serial_length, &ends_); DeserializeValue(&serial_data, &serial_length, &axes_); + DeserializeValue(&serial_data, &serial_length, &with_fp16_); cudaEventCreate(©_event_); cudaStreamCreate(©_stream_); } @@ -187,17 +188,17 @@ int SlicePlugin::enqueue(int batch_size, const void *const *inputs, } size_t SlicePlugin::getSerializationSize() const TRT_NOEXCEPT { - return getBaseSerializationSize() + SerializedSize(getPluginType()) + - SerializedSize(starts_) + SerializedSize(ends_) + - SerializedSize(axes_); + return getBaseSerializationSize() + SerializedSize(starts_) + + SerializedSize(ends_) + SerializedSize(axes_) + + SerializedSize(with_fp16_); } void SlicePlugin::serialize(void *buffer) const TRT_NOEXCEPT { - SerializeValue(&buffer, getPluginType()); serializeBase(buffer); SerializeValue(&buffer, starts_); SerializeValue(&buffer, ends_); SerializeValue(&buffer, axes_); + SerializeValue(&buffer, with_fp16_); } // Dynamic Plugin below. diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py index 6ea2335c7a1b1..98232838ee08b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py @@ -86,5 +86,19 @@ def setUpTensorRTParams(self): self.enable_trt = True +class StaticSlicePluginTRTTestFp16(SlicePluginTRTTest): + def setUpTensorRTParams(self): + self.trt_parameters = SlicePluginTRTTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False) + self.enable_trt = True + + +class StaticSlicePluginTRTTestFp32(SlicePluginTRTTest): + def setUpTensorRTParams(self): + self.trt_parameters = SlicePluginTRTTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, True, False) + self.enable_trt = True + + if __name__ == "__main__": unittest.main() From 89ee527c19fd90dfd6443e562cf83a6f4afc524c Mon Sep 17 00:00:00 2001 From: wenbin Date: Sat, 23 Oct 2021 16:45:56 +0800 Subject: [PATCH 4/4] disable padding if dynamic shape (#36648) * disable padding if dynamic shape * add parentheses * correct --- paddle/fluid/inference/tensorrt/convert/pool2d_op.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index feaafebe7b031..9eed3af335f41 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -135,8 +135,11 @@ class Pool2dOpConverter : public OpConverter { if (engine_->with_dynamic_shape()) { if (!adaptive && !global_pooling && !ceil_mode) { - if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) && - (padding_algorithm != "SAME")) { + // input_shape.d < 0 means we can't get shape info here. + // we may suffer from issue if shape is not met finally. + if ((padding_algorithm != "SAME") && + ((g_post_pad.w() > 0 && input_shape.d[input_dims - 2] > 0) || + (g_post_pad.h() > 0 && input_shape.d[input_dims - 1] > 0))) { auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, g_pre_pad, g_post_pad); PADDLE_ENFORCE_NOT_NULL( @@ -145,6 +148,7 @@ class Pool2dOpConverter : public OpConverter { "created. The pointer to pad layer is `NULL`.")); input1 = pad_layer->getOutput(0); } + auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1, nv_pool_type, nv_ksize); pool_layer->setStride(nv_strides);