Skip to content

Commit

Permalink
Perception: allow corresponding single anchor for each class (ApolloA…
Browse files Browse the repository at this point in the history
…uto#11265)

* Perception: fix PreprocessGPU in PointPillars

* Perception: allow corresponding single anchor for each of 3 classes
  • Loading branch information
jeroldchen authored May 18, 2020
1 parent 3b65fc8 commit 6125475
Show file tree
Hide file tree
Showing 12 changed files with 186 additions and 250 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ __global__ void scan_x(int* g_odata, int* g_idata, int n) {
}
if (thid == 0) {
temp[n - 1] = 0;
} // clear the last element
} // clear the last element
for (int d = 1; d < n; d *= 2) { // traverse down tree & build scan
offset >>= 1;
__syncthreads();
Expand Down Expand Up @@ -108,7 +108,7 @@ __global__ void scan_y(int* g_odata, int* g_idata, int n) {
}
if (thid == 0) {
temp[n - 1] = 0;
} // clear the last element
} // clear the last element
for (int d = 1; d < n; d *= 2) { // traverse down tree & build scan
offset >>= 1;
__syncthreads();
Expand Down Expand Up @@ -174,14 +174,21 @@ __global__ void make_anchor_mask_kernel(
}
}

AnchorMaskCuda::AnchorMaskCuda(
const int num_inds_for_scan, const int num_anchor_x_inds,
const int num_anchor_y_inds, const int num_anchor_r_inds,
const float min_x_range, const float min_y_range, const float pillar_x_size,
const float pillar_y_size, const int grid_x_size, const int grid_y_size)
AnchorMaskCuda::AnchorMaskCuda(const int num_inds_for_scan,
const int num_anchor_x_inds,
const int num_anchor_y_inds,
const int num_class,
const int num_anchor_r_inds,
const float min_x_range,
const float min_y_range,
const float pillar_x_size,
const float pillar_y_size,
const int grid_x_size,
const int grid_y_size)
: num_inds_for_scan_(num_inds_for_scan),
num_anchor_x_inds_(num_anchor_x_inds),
num_anchor_y_inds_(num_anchor_y_inds),
num_class_(num_class),
num_anchor_r_inds_(num_anchor_r_inds),
min_x_range_(min_x_range),
min_y_range_(min_y_range),
Expand All @@ -204,7 +211,8 @@ void AnchorMaskCuda::DoAnchorMaskCuda(
GPU_CHECK(cudaMemcpy(dev_sparse_pillar_map, dev_cumsum_along_y,
num_inds_for_scan_ * num_inds_for_scan_ * sizeof(int),
cudaMemcpyDeviceToDevice));
make_anchor_mask_kernel<<<num_anchor_x_inds_ * num_anchor_r_inds_,
make_anchor_mask_kernel<<<num_anchor_x_inds_ * num_class_ *
num_anchor_r_inds_,
num_anchor_y_inds_>>>(
dev_box_anchors_min_x, dev_box_anchors_min_y, dev_box_anchors_max_x,
dev_box_anchors_max_y, dev_sparse_pillar_map, dev_anchor_mask,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class AnchorMaskCuda {
const int num_inds_for_scan_;
const int num_anchor_x_inds_;
const int num_anchor_y_inds_;
const int num_class_;
const int num_anchor_r_inds_;
const float min_x_range_;
const float min_y_range_;
Expand All @@ -62,6 +63,7 @@ class AnchorMaskCuda {
* @param[in] num_inds_for_scan Number of indexes for scan(cumsum)
* @param[in] num_anchor_x_inds Number of x-indexes for anchors
* @param[in] num_anchor_y_inds Number of y-indexes for anchors
* @param[in] num_class Number of classes
* @param[in] num_anchor_r_inds Number of rotation-indexes for anchors
* @param[in] min_x_range Minimum x value for pointcloud
* @param[in] min_y_range Minimum y value for pointcloud
Expand All @@ -72,29 +74,30 @@ class AnchorMaskCuda {
* @details Captital variables never change after the compile
*/
AnchorMaskCuda(const int num_inds_for_scan, const int num_anchor_x_inds,
const int num_anchor_y_inds, const int num_anchor_r_inds,
const float min_x_range, const float min_y_range,
const float pillar_x_size, const float pillar_y_size,
const int grid_x_size, const int grid_y_size);
const int num_anchor_y_inds, const int num_class,
const int num_anchor_r_inds, const float min_x_range,
const float min_y_range, const float pillar_x_size,
const float pillar_y_size, const int grid_x_size,
const int grid_y_size);

/**
* @brief call cuda code for making anchor mask
* @param[in] dev_sparse_pillar_map Grid map representation for pillar
* occupancy
* @param[in] dev_cumsum_along_x Array for storing cumsum-ed
* dev_sparse_pillar_map values
* @param[in] dev_cumsum_along_y Array for storing cumsum-ed
* dev_cumsum_along_y values
* @param[in] dev_box_anchors_min_x Array for storing min x value for each
* anchor
* @param[in] dev_box_anchors_min_y Array for storing min y value for each
* anchor
* @param[in] dev_box_anchors_max_x Array for storing max x value for each
* anchor
* @param[in] dev_box_anchors_max_y Array for storing max y value for each
* anchor
* @param[in] dev_box_anchors_max_y Array for storing max y value for each
* anchor
* @param[in] dev_sparse_pillar_map
* Grid map representation for pillar occupancy
* @param[in] dev_cumsum_along_x
* Array for storing cumsum-ed dev_sparse_pillar_map values
* @param[in] dev_cumsum_along_y
* Array for storing cumsum-ed dev_cumsum_along_y values
* @param[in] dev_box_anchors_min_x
* Array for storing min x value for each anchor
* @param[in] dev_box_anchors_min_y
* Array for storing min y value for each anchor
* @param[in] dev_box_anchors_max_x
* Array for storing max x value for each anchor
* @param[in] dev_box_anchors_max_y
* Array for storing max y value for each anchor
* @param[in] dev_box_anchors_max_y
* Array for storing max y value for each anchor
* @param[out] dev_anchor_mask Anchor mask for filtering the network output
* @details dev_* means device memory. Make a mask for filtering pillar
* occupancy area
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ namespace apollo {
namespace perception {
namespace lidar {

const int PointPillars::kNumClass = 1;
const int PointPillars::kNumClass = 3;
const int PointPillars::kMaxNumPillars = 12000;
const int PointPillars::kMaxNumPointsPerPillar = 100;
const int PointPillars::kNumPointFeature = 4;
const int PointPillars::kPfeOutputSize = kMaxNumPillars * 64;
const int PointPillars::kGridXSize = 432;
const int PointPillars::kGridYSize = 496;
const int PointPillars::kGridXSize = 280;
const int PointPillars::kGridYSize = 320;
const int PointPillars::kGridZSize = 1;
const int PointPillars::kRpnInputSize = 64 * kGridXSize * kGridYSize;
const int PointPillars::kNumAnchorXInds = kGridXSize * 0.5;
const int PointPillars::kNumAnchorYInds = kGridYSize * 0.5;
const int PointPillars::kNumAnchorRInds = 2;
const int PointPillars::kNumAnchor =
kNumAnchorXInds * kNumAnchorYInds * kNumAnchorRInds;
kNumAnchorXInds * kNumAnchorYInds * kNumClass * kNumAnchorRInds;
const int PointPillars::kNumOutputBoxFeature = 7;
const int PointPillars::kRpnBoxOutputSize = kNumAnchor * kNumOutputBoxFeature;
const int PointPillars::kRpnClsOutputSize = kNumAnchor * kNumClass;
Expand All @@ -67,20 +67,20 @@ const int PointPillars::kNumThreads = 64;
// common.h
const int PointPillars::kNumBoxCorners = 4;
// TODO(chenjiahao): kNumBoxCorners is actually used as kNumPointFeature
const float PointPillars::kPillarXSize = 0.16f;
const float PointPillars::kPillarYSize = 0.16f;
const float PointPillars::kPillarXSize = 0.25f;
const float PointPillars::kPillarYSize = 0.25f;
const float PointPillars::kPillarZSize = 4.0f;
const float PointPillars::kMinXRange = 0.0f;
const float PointPillars::kMinYRange = -39.68f;
const float PointPillars::kMinYRange = -40.0f;
const float PointPillars::kMinZRange = -3.0f;
const float PointPillars::kMaxXRange = 69.12f;
const float PointPillars::kMaxYRange = 39.68f;
const float PointPillars::kMaxXRange = 70.0f;
const float PointPillars::kMaxYRange = 40.0f;
const float PointPillars::kMaxZRange = 1.0f;
const float PointPillars::kSensorHeight = 1.73f;
// TODO(chenjiahao): get kSensorHeight's value from sensor's height param
const float PointPillars::kAnchorDxSize = 1.6f;
const float PointPillars::kAnchorDySize = 3.9f;
const float PointPillars::kAnchorDzSize = 1.56f;
// TODO(chenjiahao): kSensorHeight need to get from sensor's height param
const std::vector<float> PointPillars::kAnchorDxSizes{1.6f, 0.6f, 0.6f};
const std::vector<float> PointPillars::kAnchorDySizes{3.9f, 1.76f, 0.8f};
const std::vector<float> PointPillars::kAnchorDzSizes{1.56f, 1.73f, 1.73f};

PointPillars::PointPillars(const bool reproduce_result_mode,
const float score_threshold,
Expand All @@ -107,18 +107,18 @@ PointPillars::PointPillars(const bool reproduce_result_mode,

anchor_mask_cuda_ptr_.reset(
new AnchorMaskCuda(kNumIndsForScan, kNumAnchorXInds, kNumAnchorYInds,
kNumAnchorRInds, kMinXRange, kMinYRange, kPillarXSize,
kPillarYSize, kGridXSize, kGridYSize));
kNumClass, kNumAnchorRInds, kMinXRange, kMinYRange,
kPillarXSize, kPillarYSize, kGridXSize, kGridYSize));

scatter_cuda_ptr_.reset(
new ScatterCuda(kNumThreads, kMaxNumPillars, kGridXSize, kGridYSize));

const float float_min = std::numeric_limits<float>::lowest();
const float float_max = std::numeric_limits<float>::max();
postprocess_cuda_ptr_.reset(new PostprocessCuda(
float_min, float_max, kNumAnchorXInds, kNumAnchorYInds, kNumAnchorRInds,
score_threshold_, kNumThreads, nms_overlap_threshold_, kNumBoxCorners,
kNumOutputBoxFeature, kNumClass));
float_min, float_max, kNumAnchorXInds, kNumAnchorYInds, kNumClass,
kNumAnchorRInds, score_threshold_, kNumThreads,
nms_overlap_threshold_, kNumBoxCorners, kNumOutputBoxFeature));

DeviceMemoryMalloc();
InitTRT();
Expand Down Expand Up @@ -334,16 +334,18 @@ void PointPillars::GenerateAnchors(float* anchors_px_, float* anchors_py_,

for (int y = 0; y < kNumAnchorYInds; ++y) {
for (int x = 0; x < kNumAnchorXInds; ++x) {
for (int r = 0; r < kNumAnchorRInds; ++r) {
int ind =
y * kNumAnchorXInds * kNumAnchorRInds + x * kNumAnchorRInds + r;
anchors_px_[ind] = anchor_x_count[x];
anchors_py_[ind] = anchor_y_count[y];
anchors_ro_[ind] = anchor_r_count[r];
anchors_pz_[ind] = -1 * kSensorHeight;
anchors_dx_[ind] = kAnchorDxSize;
anchors_dy_[ind] = kAnchorDySize;
anchors_dz_[ind] = kAnchorDzSize;
for (int c = 0; c < kNumClass; ++c) {
for (int r = 0; r < kNumAnchorRInds; ++r) {
int ind = y * kNumAnchorXInds * kNumClass * kNumAnchorRInds +
x * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds + r;
anchors_px_[ind] = anchor_x_count[x];
anchors_py_[ind] = anchor_y_count[y];
anchors_ro_[ind] = anchor_r_count[r];
anchors_pz_[ind] = -1 * kSensorHeight;
anchors_dx_[ind] = kAnchorDxSizes[c];
anchors_dy_[ind] = kAnchorDySizes[c];
anchors_dz_[ind] = kAnchorDzSizes[c];
}
}
}
}
Expand Down Expand Up @@ -381,34 +383,38 @@ void PointPillars::ConvertAnchors2BoxAnchors(float* anchors_px,
float* box_anchors_min_y_,
float* box_anchors_max_x_,
float* box_anchors_max_y_) {
// flipping box's dimension at direction that rotates 90 degree
// flipping box's dimension
float flipped_anchors_dx[kNumAnchor];
flipped_anchors_dx[0] = 0;
float flipped_anchors_dy[kNumAnchor];
flipped_anchors_dy[0] = 0;
memset(flipped_anchors_dx, 0, kNumAnchor * sizeof(float));
memset(flipped_anchors_dy, 0, kNumAnchor * sizeof(float));
for (int x = 0; x < kNumAnchorXInds; ++x) {
for (int y = 0; y < kNumAnchorYInds; ++y) {
int base_ind =
x * kNumAnchorYInds * kNumAnchorRInds + y * kNumAnchorRInds;
flipped_anchors_dx[base_ind + 0] = kAnchorDxSize;
flipped_anchors_dy[base_ind + 0] = kAnchorDySize;
flipped_anchors_dx[base_ind + 1] = kAnchorDySize;
flipped_anchors_dy[base_ind + 1] = kAnchorDxSize;
for (int c = 0; c < kNumClass; ++c) {
int base_ind = x * kNumAnchorYInds * kNumClass * kNumAnchorRInds +
y * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds;
flipped_anchors_dx[base_ind + 0] = kAnchorDxSizes[c];
flipped_anchors_dy[base_ind + 0] = kAnchorDySizes[c];
flipped_anchors_dx[base_ind + 1] = kAnchorDySizes[c];
flipped_anchors_dy[base_ind + 1] = kAnchorDxSizes[c];
}
}
}
for (int x = 0; x < kNumAnchorXInds; ++x) {
for (int y = 0; y < kNumAnchorYInds; ++y) {
for (int r = 0; r < kNumAnchorRInds; ++r) {
int ind =
x * kNumAnchorYInds * kNumAnchorRInds + y * kNumAnchorRInds + r;
box_anchors_min_x_[ind] =
anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
box_anchors_min_y_[ind] =
anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
box_anchors_max_x_[ind] =
anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
box_anchors_max_y_[ind] =
anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
for (int c = 0; c < kNumClass; ++c) {
for (int r = 0; r < kNumAnchorRInds; ++r) {
int ind = x * kNumAnchorYInds * kNumClass * kNumAnchorRInds +
y * kNumClass * kNumAnchorRInds + c * kNumAnchorRInds + r;
box_anchors_min_x_[ind] =
anchors_px[ind] - flipped_anchors_dx[ind] / 2.0f;
box_anchors_min_y_[ind] =
anchors_py[ind] - flipped_anchors_dy[ind] / 2.0f;
box_anchors_max_x_[ind] =
anchors_px[ind] + flipped_anchors_dx[ind] / 2.0f;
box_anchors_max_y_[ind] =
anchors_py[ind] + flipped_anchors_dy[ind] / 2.0f;
}
}
}
}
Expand Down Expand Up @@ -538,7 +544,6 @@ void PointPillars::PreprocessCPU(const float* in_points_array,
delete[] sparse_pillar_map;
}

// TODO(chenjiahao): fix this function
void PointPillars::PreprocessGPU(const float* in_points_array,
const int in_num_points) {
float* dev_points;
Expand All @@ -548,15 +553,19 @@ void PointPillars::PreprocessGPU(const float* in_points_array,
in_num_points * kNumBoxCorners * sizeof(float),
cudaMemcpyHostToDevice));

GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
kNumIndsForScan * kNumIndsForScan * sizeof(int)));
GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
kMaxNumPillars * kMaxNumPointsPerPillar *
kNumPointFeature * sizeof(float)));
GPU_CHECK(cudaMemset(dev_x_coors_, 0, kMaxNumPillars * sizeof(int)));
GPU_CHECK(cudaMemset(dev_y_coors_, 0, kMaxNumPillars * sizeof(int)));
GPU_CHECK(cudaMemset(dev_num_points_per_pillar_, 0,
kMaxNumPillars * sizeof(float)));
GPU_CHECK(cudaMemset(dev_pillar_point_feature_, 0,
kMaxNumPillars * kMaxNumPointsPerPillar *
kNumPointFeature * sizeof(float)));
GPU_CHECK(cudaMemset(dev_pillar_coors_, 0,
kMaxNumPillars * 4 * sizeof(float)));
GPU_CHECK(cudaMemset(dev_sparse_pillar_map_, 0,
kNumIndsForScan * kNumIndsForScan * sizeof(int)));
host_pillar_count_[0] = 0;

GPU_CHECK(cudaMemset(dev_anchor_mask_, 0, kNumAnchor * sizeof(int)));

preprocess_points_cuda_ptr_->DoPreprocessPointsCuda(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ class PointPillars {
static const float kMaxYRange;
static const float kMaxZRange;
static const float kSensorHeight;
static const float kAnchorDxSize;
static const float kAnchorDySize;
static const float kAnchorDzSize;
static const std::vector<float> kAnchorDxSizes;
static const std::vector<float> kAnchorDySizes;
static const std::vector<float> kAnchorDzSizes;

// initialize in initializer list
const bool reproduce_result_mode_;
Expand All @@ -161,13 +161,6 @@ class PointPillars {
float* box_anchors_max_x_;
float* box_anchors_max_y_;

// cuda malloc
float* dev_pillar_x_in_coors_;
float* dev_pillar_y_in_coors_;
float* dev_pillar_z_in_coors_;
float* dev_pillar_i_in_coors_;
int* dev_pillar_count_histo_;

int* dev_x_coors_;
int* dev_y_coors_;
float* dev_num_points_per_pillar_;
Expand Down Expand Up @@ -308,7 +301,8 @@ class PointPillars {
* Represents maximum y value for a corresponding anchor
* @details Make box anchors for nms
*/
void ConvertAnchors2BoxAnchors(float* anchors_px_, float* anchors_py_,
void ConvertAnchors2BoxAnchors(float* anchors_px_,
float* anchors_py_,
float* box_anchors_min_x_,
float* box_anchors_min_y_,
float* box_anchors_max_x_,
Expand All @@ -331,7 +325,8 @@ class PointPillars {
* @param[in] rpn_onnx_file Region Proposal Network ONNX file path
* @details Variables could be changed through point_pillars_detection
*/
PointPillars(const bool reproduce_result_mode, const float score_threshold,
PointPillars(const bool reproduce_result_mode,
const float score_threshold,
const float nms_overlap_threshold,
const std::string pfe_onnx_file,
const std::string rpn_onnx_file);
Expand All @@ -345,7 +340,8 @@ class PointPillars {
* @param[out] out_labels Network output object's label
* @details This is an interface for the algorithm
*/
void DoInference(const float* in_points_array, const int in_num_points,
void DoInference(const float* in_points_array,
const int in_num_points,
std::vector<float>* out_detections,
std::vector<int>* out_labels);
};
Expand Down
Loading

0 comments on commit 6125475

Please sign in to comment.