Perception: allow corresponding single anchor for each class (ApolloA…

…uto#11265) * Perception: fix PreprocessGPU in PointPillars * Perception: allow corresponding single anchor for each of 3 classes
wuqunyong · May 18, 2020 · 6125475 · 6125475
1 parent 3b65fc8
commit 6125475
Show file tree

Hide file tree

Showing 12 changed files with 186 additions and 250 deletions.
diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.cu b/modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.cu
@@ -61,7 +61,7 @@ __global__ void scan_x(int* g_odata, int* g_idata, int n) {
   }
   if (thid == 0) {
     temp[n - 1] = 0;
-  }                               // clear the last element
+  }                                 // clear the last element
   for (int d = 1; d < n; d *= 2) {  // traverse down tree & build scan
     offset >>= 1;
     __syncthreads();
@@ -108,7 +108,7 @@ __global__ void scan_y(int* g_odata, int* g_idata, int n) {
   }
   if (thid == 0) {
     temp[n - 1] = 0;
-  }                               // clear the last element
+  }                                 // clear the last element
   for (int d = 1; d < n; d *= 2) {  // traverse down tree & build scan
     offset >>= 1;
     __syncthreads();
@@ -174,14 +174,21 @@ __global__ void make_anchor_mask_kernel(
   }
 }
 
-AnchorMaskCuda::AnchorMaskCuda(
-    const int num_inds_for_scan, const int num_anchor_x_inds,
-    const int num_anchor_y_inds, const int num_anchor_r_inds,
-    const float min_x_range, const float min_y_range, const float pillar_x_size,
-    const float pillar_y_size, const int grid_x_size, const int grid_y_size)
+AnchorMaskCuda::AnchorMaskCuda(const int num_inds_for_scan,
+                               const int num_anchor_x_inds,
+                               const int num_anchor_y_inds,
+                               const int num_class,
+                               const int num_anchor_r_inds,
+                               const float min_x_range,
+                               const float min_y_range,
+                               const float pillar_x_size,
+                               const float pillar_y_size,
+                               const int grid_x_size,
+                               const int grid_y_size)
     : num_inds_for_scan_(num_inds_for_scan),
       num_anchor_x_inds_(num_anchor_x_inds),
       num_anchor_y_inds_(num_anchor_y_inds),
+      num_class_(num_class),
       num_anchor_r_inds_(num_anchor_r_inds),
       min_x_range_(min_x_range),
       min_y_range_(min_y_range),
@@ -204,7 +211,8 @@ void AnchorMaskCuda::DoAnchorMaskCuda(
   GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map, dev_cumsum_along_y,
                        num_inds_for_scan_ * num_inds_for_scan_ * sizeof(int),
                        cudaMemcpyDeviceToDevice));
-  make_anchor_mask_kernel<<<num_anchor_x_inds_ * num_anchor_r_inds_,
+  make_anchor_mask_kernel<<<num_anchor_x_inds_ * num_class_ *
+                                num_anchor_r_inds_,
                             num_anchor_y_inds_>>>(
       dev_box_anchors_min_x, dev_box_anchors_min_y, dev_box_anchors_max_x,
       dev_box_anchors_max_y, dev_sparse_pillar_map, dev_anchor_mask,

diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/anchor_mask_cuda.h
@@ -48,6 +48,7 @@ class AnchorMaskCuda {
   const int num_inds_for_scan_;
   const int num_anchor_x_inds_;
   const int num_anchor_y_inds_;
+  const int num_class_;
   const int num_anchor_r_inds_;
   const float min_x_range_;
   const float min_y_range_;
@@ -62,6 +63,7 @@ class AnchorMaskCuda {
    * @param[in] num_inds_for_scan Number of indexes for scan(cumsum)
    * @param[in] num_anchor_x_inds Number of x-indexes for anchors
    * @param[in] num_anchor_y_inds Number of y-indexes for anchors
+   * @param[in] num_class Number of classes
    * @param[in] num_anchor_r_inds Number of rotation-indexes for anchors
    * @param[in] min_x_range Minimum x value for pointcloud
    * @param[in] min_y_range Minimum y value for pointcloud
@@ -72,29 +74,30 @@ class AnchorMaskCuda {
    * @details Captital variables never change after the compile
    */
   AnchorMaskCuda(const int num_inds_for_scan, const int num_anchor_x_inds,
-                 const int num_anchor_y_inds, const int num_anchor_r_inds,
-                 const float min_x_range, const float min_y_range,
-                 const float pillar_x_size, const float pillar_y_size,
-                 const int grid_x_size, const int grid_y_size);
+                 const int num_anchor_y_inds, const int num_class,
+                 const int num_anchor_r_inds, const float min_x_range,
+                 const float min_y_range, const float pillar_x_size,
+                 const float pillar_y_size, const int grid_x_size,
+                 const int grid_y_size);
 
   /**
    * @brief call cuda code for making anchor mask
-   * @param[in] dev_sparse_pillar_map Grid map representation for pillar
-   * occupancy
-   * @param[in] dev_cumsum_along_x Array for storing cumsum-ed
-   * dev_sparse_pillar_map values
-   * @param[in] dev_cumsum_along_y Array for storing cumsum-ed
-   * dev_cumsum_along_y values
-   * @param[in] dev_box_anchors_min_x Array for storing min x value for each
-   * anchor
-   * @param[in] dev_box_anchors_min_y Array for storing min y value for each
-   * anchor
-   * @param[in] dev_box_anchors_max_x Array for storing max x value for each
-   * anchor
-   * @param[in] dev_box_anchors_max_y Array for storing max y value for each
-   * anchor
-   * @param[in] dev_box_anchors_max_y Array for storing max y value for each
-   * anchor
+   * @param[in] dev_sparse_pillar_map
+   *   Grid map representation for pillar occupancy
+   * @param[in] dev_cumsum_along_x
+   *   Array for storing cumsum-ed dev_sparse_pillar_map values
+   * @param[in] dev_cumsum_along_y
+   *   Array for storing cumsum-ed dev_cumsum_along_y values
+   * @param[in] dev_box_anchors_min_x
+   *   Array for storing min x value for each anchor
+   * @param[in] dev_box_anchors_min_y
+   *   Array for storing min y value for each anchor
+   * @param[in] dev_box_anchors_max_x
+   *   Array for storing max x value for each anchor
+   * @param[in] dev_box_anchors_max_y
+   *   Array for storing max y value for each anchor
+   * @param[in] dev_box_anchors_max_y
+   *   Array for storing max y value for each anchor
    * @param[out] dev_anchor_mask Anchor mask for filtering the network output
    * @details dev_* means device memory. Make a mask for filtering pillar
    * occupancy area

diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.cc
@@ -42,20 +42,20 @@ namespace apollo {
 namespace perception {
 namespace lidar {
 
-const int PointPillars::kNumClass = 1;
+const int PointPillars::kNumClass = 3;
 const int PointPillars::kMaxNumPillars = 12000;
 const int PointPillars::kMaxNumPointsPerPillar = 100;
 const int PointPillars::kNumPointFeature = 4;
 const int PointPillars::kPfeOutputSize = kMaxNumPillars * 64;
-const int PointPillars::kGridXSize = 432;
-const int PointPillars::kGridYSize = 496;
+const int PointPillars::kGridXSize = 280;
+const int PointPillars::kGridYSize = 320;
 const int PointPillars::kGridZSize = 1;
 const int PointPillars::kRpnInputSize = 64 * kGridXSize * kGridYSize;
 const int PointPillars::kNumAnchorXInds = kGridXSize * 0.5;
 const int PointPillars::kNumAnchorYInds = kGridYSize * 0.5;
 const int PointPillars::kNumAnchorRInds = 2;
 const int PointPillars::kNumAnchor =
-    kNumAnchorXInds * kNumAnchorYInds * kNumAnchorRInds;
+    kNumAnchorXInds * kNumAnchorYInds * kNumClass * kNumAnchorRInds;
 const int PointPillars::kNumOutputBoxFeature = 7;
 const int PointPillars::kRpnBoxOutputSize = kNumAnchor * kNumOutputBoxFeature;
 const int PointPillars::kRpnClsOutputSize = kNumAnchor * kNumClass;
@@ -67,20 +67,20 @@ const int PointPillars::kNumThreads = 64;
 // common.h
 const int PointPillars::kNumBoxCorners = 4;
 // TODO(chenjiahao): kNumBoxCorners is actually used as kNumPointFeature
-const float PointPillars::kPillarXSize = 0.16f;
-const float PointPillars::kPillarYSize = 0.16f;
+const float PointPillars::kPillarXSize = 0.25f;
+const float PointPillars::kPillarYSize = 0.25f;
 const float PointPillars::kPillarZSize = 4.0f;
 const float PointPillars::kMinXRange = 0.0f;
-const float PointPillars::kMinYRange = -39.68f;
+const float PointPillars::kMinYRange = -40.0f;
 const float PointPillars::kMinZRange = -3.0f;
-const float PointPillars::kMaxXRange = 69.12f;
-const float PointPillars::kMaxYRange = 39.68f;
+const float PointPillars::kMaxXRange = 70.0f;
+const float PointPillars::kMaxYRange = 40.0f;
 const float PointPillars::kMaxZRange = 1.0f;
 const float PointPillars::kSensorHeight = 1.73f;
-// TODO(chenjiahao): get kSensorHeight's value from sensor's height param
-const float PointPillars::kAnchorDxSize = 1.6f;
-const float PointPillars::kAnchorDySize = 3.9f;
-const float PointPillars::kAnchorDzSize = 1.56f;
+// TODO(chenjiahao): kSensorHeight need to get from sensor's height param
+const std::vector<float> PointPillars::kAnchorDxSizes{1.6f, 0.6f, 0.6f};
+const std::vector<float> PointPillars::kAnchorDySizes{3.9f, 1.76f, 0.8f};
+const std::vector<float> PointPillars::kAnchorDzSizes{1.56f, 1.73f, 1.73f};
 
 PointPillars::PointPillars(const bool reproduce_result_mode,
                            const float score_threshold,
@@ -107,18 +107,18 @@ PointPillars::PointPillars(const bool reproduce_result_mode,
 
   anchor_mask_cuda_ptr_.reset(
       new AnchorMaskCuda(kNumIndsForScan, kNumAnchorXInds, kNumAnchorYInds,
-                         kNumAnchorRInds, kMinXRange, kMinYRange, kPillarXSize,
-                         kPillarYSize, kGridXSize, kGridYSize));
+                         kNumClass, kNumAnchorRInds, kMinXRange, kMinYRange,
+                         kPillarXSize, kPillarYSize, kGridXSize, kGridYSize));
 
   scatter_cuda_ptr_.reset(
       new ScatterCuda(kNumThreads, kMaxNumPillars, kGridXSize, kGridYSize));
 
   const float float_min = std::numeric_limits<float>::lowest();
   const float float_max = std::numeric_limits<float>::max();
   postprocess_cuda_ptr_.reset(new PostprocessCuda(
-      float_min, float_max, kNumAnchorXInds, kNumAnchorYInds, kNumAnchorRInds,
-      score_threshold_, kNumThreads, nms_overlap_threshold_, kNumBoxCorners,
-      kNumOutputBoxFeature, kNumClass));
+      float_min, float_max, kNumAnchorXInds, kNumAnchorYInds, kNumClass,
+      kNumAnchorRInds, score_threshold_, kNumThreads,
+      nms_overlap_threshold_, kNumBoxCorners, kNumOutputBoxFeature));
 
   DeviceMemoryMalloc();
   InitTRT();
@@ -334,16 +334,18 @@ void PointPillars::GenerateAnchors(float* anchors_px_, float* anchors_py_,
 
   for (int y = 0; y < kNumAnchorYInds; ++y) {
     for (int x = 0; x < kNumAnchorXInds; ++x) {
-      for (int r = 0; r < kNumAnchorRInds; ++r) {
-        int ind =
-            y * kNumAnchorXInds * kNumAnchorRInds + x * kNumAnchorRInds + r;
-        anchors_px_[ind] = anchor_x_count[x];
-        anchors_py_[ind] = anchor_y_count[y];
-        anchors_ro_[ind] = anchor_r_count[r];
-        anchors_pz_[ind] = -1 * kSensorHeight;
-        anchors_dx_[ind] = kAnchorDxSize;
-        anchors_dy_[ind] = kAnchorDySize;
-        anchors_dz_[ind] = kAnchorDzSize;
+      for (int c = 0; c < kNumClass; ++c) {
+        for (int r = 0; r < kNumAnchorRInds; ++r) {
+          int ind = y * kNumAnchorXInds * kNumClass * kNumAnchorRInds +
+              x * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds + r;
+          anchors_px_[ind] = anchor_x_count[x];
+          anchors_py_[ind] = anchor_y_count[y];
+          anchors_ro_[ind] = anchor_r_count[r];
+          anchors_pz_[ind] = -1 * kSensorHeight;
+          anchors_dx_[ind] = kAnchorDxSizes[c];
+          anchors_dy_[ind] = kAnchorDySizes[c];
+          anchors_dz_[ind] = kAnchorDzSizes[c];
+        }
       }
     }
   }
@@ -381,34 +383,38 @@ void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px,
                                              float* box_anchors_min_y_,
                                              float* box_anchors_max_x_,
                                              float* box_anchors_max_y_) {
-  // flipping box's dimension at direction that rotates 90 degree
+  // flipping box's dimension
   float flipped_anchors_dx[kNumAnchor];
-  flipped_anchors_dx[0] = 0;
   float flipped_anchors_dy[kNumAnchor];
-  flipped_anchors_dy[0] = 0;
+  memset(flipped_anchors_dx, 0, kNumAnchor * sizeof(float));
+  memset(flipped_anchors_dy, 0, kNumAnchor * sizeof(float));
   for (int x = 0; x < kNumAnchorXInds; ++x) {
     for (int y = 0; y < kNumAnchorYInds; ++y) {
-      int base_ind =
-          x * kNumAnchorYInds * kNumAnchorRInds + y * kNumAnchorRInds;
-      flipped_anchors_dx[base_ind + 0] = kAnchorDxSize;
-      flipped_anchors_dy[base_ind + 0] = kAnchorDySize;
-      flipped_anchors_dx[base_ind + 1] = kAnchorDySize;
-      flipped_anchors_dy[base_ind + 1] = kAnchorDxSize;
+      for (int c = 0; c < kNumClass; ++c) {
+        int base_ind = x * kNumAnchorYInds * kNumClass * kNumAnchorRInds +
+            y * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds;
+        flipped_anchors_dx[base_ind + 0] = kAnchorDxSizes[c];
+        flipped_anchors_dy[base_ind + 0] = kAnchorDySizes[c];
+        flipped_anchors_dx[base_ind + 1] = kAnchorDySizes[c];
+        flipped_anchors_dy[base_ind + 1] = kAnchorDxSizes[c];
+      }
     }
   }
   for (int x = 0; x < kNumAnchorXInds; ++x) {
     for (int y = 0; y < kNumAnchorYInds; ++y) {
-      for (int r = 0; r < kNumAnchorRInds; ++r) {
-        int ind =
-            x * kNumAnchorYInds * kNumAnchorRInds + y * kNumAnchorRInds + r;
-        box_anchors_min_x_[ind] =
-            anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
-        box_anchors_min_y_[ind] =
-            anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
-        box_anchors_max_x_[ind] =
-            anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
-        box_anchors_max_y_[ind] =
-            anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
+      for (int c = 0; c < kNumClass; ++c) {
+        for (int r = 0; r < kNumAnchorRInds; ++r) {
+          int ind = x * kNumAnchorYInds * kNumClass * kNumAnchorRInds +
+              y * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds + r;
+          box_anchors_min_x_[ind] =
+              anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
+          box_anchors_min_y_[ind] =
+              anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
+          box_anchors_max_x_[ind] =
+              anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
+          box_anchors_max_y_[ind] =
+              anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
+        }
       }
     }
   }
@@ -538,7 +544,6 @@ void PointPillars::PreprocessCPU(const float* in_points_array,
   delete[] sparse_pillar_map;
 }
 
-// TODO(chenjiahao): fix this function
 void PointPillars::PreprocessGPU(const float* in_points_array,
                                  const int in_num_points) {
   float* dev_points;
@@ -548,15 +553,19 @@ void PointPillars::PreprocessGPU(const float* in_points_array,
                        in_num_points * kNumBoxCorners * sizeof(float),
                        cudaMemcpyHostToDevice));
 
-  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
-                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
-  GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
-                       kMaxNumPillars * kMaxNumPointsPerPillar *
-                           kNumPointFeature * sizeof(float)));
   GPU_CHECK(cudaMemset(dev_x_coors_, 0, kMaxNumPillars * sizeof(int)));
   GPU_CHECK(cudaMemset(dev_y_coors_, 0, kMaxNumPillars * sizeof(int)));
   GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0,
                        kMaxNumPillars * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
+                       kMaxNumPillars * kMaxNumPointsPerPillar *
+                           kNumPointFeature * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_pillar_coors_, 0,
+                       kMaxNumPillars * 4 * sizeof(float)));
+  GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
+                       kNumIndsForScan * kNumIndsForScan * sizeof(int)));
+  host_pillar_count_[0] = 0;
+
   GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, kNumAnchor * sizeof(int)));
 
   preprocess_points_cuda_ptr_->DoPreprocessPointsCuda(

diff --git a/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h b/modules/perception/lidar/lib/detection/lidar_point_pillars/point_pillars.h
@@ -134,9 +134,9 @@ class PointPillars {
   static const float kMaxYRange;
   static const float kMaxZRange;
   static const float kSensorHeight;
-  static const float kAnchorDxSize;
-  static const float kAnchorDySize;
-  static const float kAnchorDzSize;
+  static const std::vector<float> kAnchorDxSizes;
+  static const std::vector<float> kAnchorDySizes;
+  static const std::vector<float> kAnchorDzSizes;
 
   // initialize in initializer list
   const bool reproduce_result_mode_;
@@ -161,13 +161,6 @@ class PointPillars {
   float* box_anchors_max_x_;
   float* box_anchors_max_y_;
 
-  // cuda malloc
-  float* dev_pillar_x_in_coors_;
-  float* dev_pillar_y_in_coors_;
-  float* dev_pillar_z_in_coors_;
-  float* dev_pillar_i_in_coors_;
-  int* dev_pillar_count_histo_;
-
   int* dev_x_coors_;
   int* dev_y_coors_;
   float* dev_num_points_per_pillar_;
@@ -308,7 +301,8 @@ class PointPillars {
    *   Represents maximum y value for a corresponding anchor
    * @details Make box anchors for nms
    */
-  void ConvertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_,
+  void ConvertAnchors2BoxAnchors(float* anchors_px_,
+                                 float* anchors_py_,
                                  float* box_anchors_min_x_,
                                  float* box_anchors_min_y_,
                                  float* box_anchors_max_x_,
@@ -331,7 +325,8 @@ class PointPillars {
    * @param[in] rpn_onnx_file Region Proposal Network ONNX file path
    * @details Variables could be changed through point_pillars_detection
    */
-  PointPillars(const bool reproduce_result_mode, const float score_threshold,
+  PointPillars(const bool reproduce_result_mode,
+               const float score_threshold,
                const float nms_overlap_threshold,
                const std::string pfe_onnx_file,
                const std::string rpn_onnx_file);
@@ -345,7 +340,8 @@ class PointPillars {
    * @param[out] out_labels Network output object's label
    * @details This is an interface for the algorithm
    */
-  void DoInference(const float* in_points_array, const int in_num_points,
+  void DoInference(const float* in_points_array,
+                   const int in_num_points,
                    std::vector<float>* out_detections,
                    std::vector<int>* out_labels);
 };