[new api] add func/class API psroi_pool and UT (#35352)

* add func/class API psroi_pool and UT * add UT in static mode * Remove redundant type checks in static mode * More detailed description for test_psroi_pool_op * fix code format of UT * fix en-doc
PaddlePaddle · Sep 26, 2021 · e45d64e · e45d64e
1 parent 991ae3b
commit e45d64e
Show file tree

Hide file tree

Showing 6 changed files with 526 additions and 120 deletions.
diff --git a/paddle/fluid/operators/psroi_pool_op.cc b/paddle/fluid/operators/psroi_pool_op.cc
@@ -25,22 +25,26 @@ class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "Tensor, "
+             "(Tensor), "
              "the input of PSROIPoolOp. "
              "The format of input tensor is NCHW. Where N is the batch size, "
              "C is the number of input channels, "
              "H is the height of the input feature map, and "
              "W is the width. The data type can be float32 or float64");
     AddInput("ROIs",
-             "LoDTensor, "
+             "(LoDTensor), "
              "ROIs (Regions of Interest) to pool over. "
              "should be a 2-D LoDTensor of shape (num_rois, 4) "
              "given as [(x1, y1, x2, y2), ...]. "
              "where (x1, y1) is the top left coordinates, and "
              "(x2, y2) is the bottom right coordinates. "
              "The roi batch index can be calculated from LoD.");
+    AddInput("RoisNum",
+             "(Tensor), "
+             "The number of RoIs in each image.")
+        .AsDispensable();
     AddOutput("Out",
-              "Tensor, "
+              "(Tensor), "
               "the output of PSROIPoolOp is a 4-D Tensor with shape "
               "(num_rois, output_channels, pooled_h, pooled_w). "
               "The data type is the same as `x` ");
@@ -65,8 +69,6 @@ class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
                  "the pooled output width.")
         .SetDefault(1);
     AddComment(R"Doc(
-**PSROIPool Operator,** `rois` **of this op should be a LoDTensor**
-
 Position sensitive region of interest pooling (also known as PSROIPooling) is to perform
 position-sensitive average pooling on regions of interest specified by input, takes as 
 input N position-sensitive score maps and a list of num_rois regions of interest. 
@@ -106,7 +108,14 @@ class PSROIPoolOp : public framework::OperatorWithKernel {
         platform::errors::InvalidArgument(
             "ROIs should be a 2-D LoDTensor of shape (num_rois, 4) "
             "given as [(x1, y1, x2, y2), ...]"));
-
+    if (ctx->HasInput("RoisNum")) {
+      auto rois_num_dims = ctx->GetInputDim("RoisNum");
+      PADDLE_ENFORCE_EQ(rois_num_dims.size(), 1,
+                        platform::errors::InvalidArgument(
+                            "The second dimension of RoisNum should "
+                            "be 1, but received dimension is %d",
+                            rois_num_dims.size()));
+    }
     int pooled_height = ctx->Attrs().Get<int>("pooled_height");
     int pooled_width = ctx->Attrs().Get<int>("pooled_width");
     int output_channels = ctx->Attrs().Get<int>("output_channels");
@@ -184,6 +193,7 @@ class PSROIPoolGradMaker : public framework::SingleGradOpMaker<T> {
     op->SetType("psroi_pool_grad");
     op->SetInput("X", this->Input("X"));
     op->SetInput("ROIs", this->Input("ROIs"));
+    op->SetInput("RoisNum", this->Input("RoisNum"));
     op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
     op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
     op->SetAttrMap(this->Attrs());

diff --git a/paddle/fluid/operators/psroi_pool_op.cu b/paddle/fluid/operators/psroi_pool_op.cu
@@ -185,34 +185,67 @@ class GPUPSROIPoolOpKernel : public framework::OpKernel<T> {
 
     int rois_num = rois->dims()[0];
     if (rois_num == 0) return;
-
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(rois_batch_size, batch_size,
-                      platform::errors::InvalidArgument(
-                          "The batch size of input(ROIs) and input(X) must be "
-                          "the same but received batch size of input(ROIs) and "
-                          "input(X) is %d and %d respectively.",
-                          rois_batch_size, batch_size));
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
-                      platform::errors::InvalidArgument(
-                          "The number of rois from input(ROIs) and its LOD "
-                          "must be the same. Received rois %d of input(ROIs) "
-                          "but the number of rois %d from its LOD is %d",
-                          rois_num, rois_num_with_lod));
-
-    // set rois batch id
+    int rois_batch_size;
     framework::Tensor rois_batch_id_list;
     rois_batch_id_list.Resize({rois_num});
     int* rois_batch_id_data =
         rois_batch_id_list.mutable_data<int>(platform::CPUPlace());
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        rois_batch_id_data[i] = n;
+
+    if (ctx.HasInput("RoisNum")) {
+      auto* rois_num_t = ctx.Input<Tensor>("RoisNum");
+      rois_batch_size = rois_num_t->numel();
+      auto* rois_num_data = rois_num_t->data<int>();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument(
+              "The batch size of input(ROIs) and input(X) must be "
+              "the same but received batch size of input(ROIs) and "
+              "input(X) is %d and %d respectively.",
+              rois_batch_size, batch_size));
+      std::vector<int> rois_num_list(rois_batch_size);
+      memory::Copy(platform::CPUPlace(), rois_num_list.data(),
+                   BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()),
+                   rois_num_data, sizeof(int) * rois_batch_size, 0);
+      int rois_num_count = 0;
+      for (int i = 0; i < rois_batch_size; ++i) {
+        rois_num_count += rois_num_list[i];
+      }
+      PADDLE_ENFORCE_EQ(
+          rois_num_count, rois_num,
+          platform::errors::InvalidArgument(
+              "the rois_num from input and RoisNum must be the same"));
+      int start = 0;
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (int i = start; i < start + rois_num_list[n]; ++i) {
+          rois_batch_id_data[i] = n;
+        }
+        start += rois_num_list[n];
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument(
+              "The batch size of input(ROIs) and input(X) must be "
+              "the same but received batch size of input(ROIs) and "
+              "input(X) is %d and %d respectively.",
+              rois_batch_size, batch_size));
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
+                        platform::errors::InvalidArgument(
+                            "The number of rois from input(ROIs) and its LOD "
+                            "must be the same. Received rois %d of input(ROIs) "
+                            "but the number of rois %d from its LOD is %d",
+                            rois_num, rois_num_with_lod));
+
+      // set rois batch id
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          rois_batch_id_data[i] = n;
+        }
       }
     }
-
     framework::Tensor rois_batch_id_list_gpu;
     framework::TensorCopy(rois_batch_id_list, ctx.GetPlace(),
                           ctx.device_context(), &rois_batch_id_list_gpu);
@@ -257,14 +290,30 @@ class GPUPSROIPoolGradOpKernel : public framework::OpKernel<T> {
       rois_batch_id_list.Resize({rois_num});
       int* rois_batch_id_data =
           rois_batch_id_list.mutable_data<int>(platform::CPUPlace());
-      auto rois_lod = rois->lod().back();
-      int rois_batch_size = rois_lod.size() - 1;
-      for (int n = 0; n < rois_batch_size; ++n) {
-        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-          rois_batch_id_data[i] = n;
+      int rois_batch_size;
+      if (ctx.HasInput("RoisNum")) {
+        auto* rois_num_t = ctx.Input<Tensor>("RoisNum");
+        rois_batch_size = rois_num_t->numel();
+        std::vector<int> rois_num_list(rois_batch_size);
+        memory::Copy(platform::CPUPlace(), rois_num_list.data(),
+                     BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()),
+                     rois_num_t->data<int>(), sizeof(int) * rois_batch_size, 0);
+        int start = 0;
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (int i = start; i < start + rois_num_list[n]; ++i) {
+            rois_batch_id_data[i] = n;
+          }
+          start += rois_num_list[n];
+        }
+      } else {
+        auto rois_lod = rois->lod().back();
+        rois_batch_size = rois_lod.size() - 1;
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+            rois_batch_id_data[i] = n;
+          }
         }
       }
-
       framework::Tensor rois_batch_id_list_gpu;
       framework::TensorCopy(rois_batch_id_list, ctx.GetPlace(),
                             ctx.device_context(), &rois_batch_id_list_gpu);

diff --git a/paddle/fluid/operators/psroi_pool_op.h b/paddle/fluid/operators/psroi_pool_op.h
@@ -40,6 +40,13 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel<T> {
     int width = in_dims[3];
     int rois_num = rois->dims()[0];
 
+    PADDLE_ENFORCE_EQ(input_channels,
+                      output_channels * pooled_height * pooled_width,
+                      platform::errors::InvalidArgument(
+                          "the channels of input "
+                          "X should equal the product of "
+                          "output_channels x pooled_height x pooled_width"));
+
     auto in_stride = framework::stride(in_dims);
     auto out_stride = framework::stride(out->dims());
 
@@ -49,32 +56,52 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel<T> {
     rois_batch_id_list.Resize({rois_num});
     int* rois_batch_id_data =
         rois_batch_id_list.mutable_data<int>(ctx.GetPlace());
-
-    auto rois_lod = rois->lod().back();
-    int rois_batch_size = rois_lod.size() - 1;
-    PADDLE_ENFORCE_EQ(
-        rois_batch_size, batch_size,
-        platform::errors::InvalidArgument("the rois_batch_size and input(X) "
-                                          "batch_size should be the same."));
-    int rois_num_with_lod = rois_lod[rois_batch_size];
-    PADDLE_ENFORCE_EQ(rois_num_with_lod, rois_num,
-                      platform::errors::InvalidArgument(
-                          "the rois_num from input and lod must be the same"));
-
-    PADDLE_ENFORCE_EQ(input_channels,
-                      output_channels * pooled_height * pooled_width,
-                      platform::errors::InvalidArgument(
-                          "the channels of input "
-                          "X should equal the product of "
-                          "output_channels x pooled_height x pooled_width"));
-
-    // calculate batch id index for each roi according to LoD
-    for (int n = 0; n < rois_batch_size; ++n) {
-      for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-        rois_batch_id_data[i] = n;
+    int rois_batch_size;
+    if (ctx.HasInput("RoisNum")) {
+      auto* rois_num_t = ctx.Input<framework::Tensor>("RoisNum");
+      rois_batch_size = rois_num_t->numel();
+      auto* rois_num_data = rois_num_t->data<int>();
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument(
+              "The batch size of rois and the batch size of images "
+              " must be the same. But received the batch size of rois is %d, "
+              "and the batch size of images is %d",
+              rois_batch_size, batch_size));
+      int rois_num_count = 0;
+      for (int i = 0; i < rois_batch_size; ++i) {
+        rois_num_count += rois_num_data[i];
+      }
+      PADDLE_ENFORCE_EQ(
+          rois_num_count, rois_num,
+          platform::errors::InvalidArgument(
+              "the rois_num from input and RoisNum must be the same"));
+      int start = 0;
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (int i = start; i < start + rois_num_data[n]; ++i) {
+          rois_batch_id_data[i] = n;
+        }
+        start += rois_num_data[n];
+      }
+    } else {
+      auto rois_lod = rois->lod().back();
+      rois_batch_size = rois_lod.size() - 1;
+      PADDLE_ENFORCE_EQ(
+          rois_batch_size, batch_size,
+          platform::errors::InvalidArgument("the rois_batch_size and input(X) "
+                                            "batch_size should be the same."));
+      int rois_num_with_lod = rois_lod[rois_batch_size];
+      PADDLE_ENFORCE_EQ(
+          rois_num_with_lod, rois_num,
+          platform::errors::InvalidArgument(
+              "the rois_num from input and lod must be the same"));
+      // calculate batch id index for each roi according to LoD
+      for (int n = 0; n < rois_batch_size; ++n) {
+        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+          rois_batch_id_data[i] = n;
+        }
       }
     }
-
     T* output_data = out->mutable_data<T>(ctx.GetPlace());
     const T* input_rois = rois->data<T>();
 
@@ -93,7 +120,6 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel<T> {
           static_cast<T>(round(offset_input_rois[2]) + 1.) * spatial_scale;
       T roi_end_h =
           static_cast<T>(round(offset_input_rois[3]) + 1.) * spatial_scale;
-
       // Force too small rois to be 1 x 1
       T roi_height = std::max(roi_end_h - roi_start_h, (T)0.1);  // avoid 0
       T roi_width = std::max(roi_end_w - roi_start_w, (T)0.1);
@@ -172,15 +198,28 @@ class CPUPSROIPoolGradOpKernel : public framework::OpKernel<T> {
       rois_batch_id_list.Resize({rois_num});
       int* rois_batch_id_data =
           rois_batch_id_list.mutable_data<int>(ctx.GetPlace());
-      auto rois_lod = rois->lod().back();
-      int rois_batch_size = rois_lod.size() - 1;
-      // calculate batch id index for each roi according to LoD
-      for (int n = 0; n < rois_batch_size; ++n) {
-        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
-          rois_batch_id_data[i] = n;
+      int rois_batch_size;
+      if (ctx.HasInput("RoisNum")) {
+        auto* rois_num_t = ctx.Input<framework::Tensor>("RoisNum");
+        rois_batch_size = rois_num_t->numel();
+        auto* rois_num_data = rois_num_t->data<int>();
+        int start = 0;
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (int i = start; i < start + rois_num_data[n]; ++i) {
+            rois_batch_id_data[i] = n;
+          }
+          start += rois_num_data[n];
+        }
+      } else {
+        auto rois_lod = rois->lod().back();
+        rois_batch_size = rois_lod.size() - 1;
+        // calculate batch id index for each roi according to LoD
+        for (int n = 0; n < rois_batch_size; ++n) {
+          for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
+            rois_batch_id_data[i] = n;
+          }
         }
       }
-
       const T* input_rois = rois->data<T>();
       const T* output_grad_data = output_grad->data<T>();
       T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());

diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc
@@ -54,6 +54,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
     {"gather", {"X", "Index", "Axis"}},
     {"roi_pool", {"X", "ROIs", "RoisNum"}},
     {"roi_align", {"X", "ROIs", "RoisNum"}},
+    {"psroi_pool", {"X", "ROIs", "RoisNum"}},
     {"collect_fpn_proposals",
      {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}},
     {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}},