@@ -50,21 +50,17 @@ __device__ int check_rect_cross(const Point &p1, const Point &p2,
50
50
}
51
51
52
52
__device__ inline int check_in_box2d (const float *box, const Point &p) {
53
- // params: box (5) [x1, y1, x2, y2, angle]
54
- const float MARGIN = 1e-5 ;
55
-
56
- float center_x = (box[0 ] + box[2 ]) / 2 ;
57
- float center_y = (box[1 ] + box[3 ]) / 2 ;
58
- float angle_cos = cos (-box[4 ]),
59
- angle_sin =
60
- sin (-box[4 ]); // rotate the point in the opposite direction of box
61
- float rot_x =
62
- (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x;
63
- float rot_y =
64
- (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y;
65
-
66
- return (rot_x > box[0 ] - MARGIN && rot_x < box[2 ] + MARGIN &&
67
- rot_y > box[1 ] - MARGIN && rot_y < box[3 ] + MARGIN);
53
+ // params: box (7) [x, y, z, dx, dy, dz, heading]
54
+ const float MARGIN = 1e-2 ;
55
+
56
+ float center_x = box[0 ], center_y = box[1 ];
57
+ // rotate the point in the opposite direction of box
58
+ float angle_cos = cos (-box[6 ]), angle_sin = sin (-box[6 ]);
59
+ float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin);
60
+ float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos;
61
+
62
+ return (fabs (rot_x) < box[3 ] / 2 + MARGIN &&
63
+ fabs (rot_y) < box[4 ] / 2 + MARGIN);
68
64
}
69
65
70
66
__device__ inline int intersection (const Point &p1, const Point &p0,
@@ -116,16 +112,19 @@ __device__ inline int point_cmp(const Point &a, const Point &b,
116
112
}
117
113
118
114
__device__ inline float box_overlap (const float *box_a, const float *box_b) {
119
- // params: box_a (5) [x1, y1, x2, y2, angle ]
120
- // params: box_b (5) [x1, y1, x2, y2, angle ]
115
+ // params box_a: [x, y, z, dx, dy, dz, heading ]
116
+ // params box_b: [x, y, z, dx, dy, dz, heading ]
121
117
122
- float a_x1 = box_a[0 ], a_y1 = box_a[1 ], a_x2 = box_a[2 ], a_y2 = box_a[3 ],
123
- a_angle = box_a[4 ];
124
- float b_x1 = box_b[0 ], b_y1 = box_b[1 ], b_x2 = box_b[2 ], b_y2 = box_b[3 ],
125
- b_angle = box_b[4 ];
118
+ float a_angle = box_a[6 ], b_angle = box_b[6 ];
119
+ float a_dx_half = box_a[3 ] / 2 , b_dx_half = box_b[3 ] / 2 ,
120
+ a_dy_half = box_a[4 ] / 2 , b_dy_half = box_b[4 ] / 2 ;
121
+ float a_x1 = box_a[0 ] - a_dx_half, a_y1 = box_a[1 ] - a_dy_half;
122
+ float a_x2 = box_a[0 ] + a_dx_half, a_y2 = box_a[1 ] + a_dy_half;
123
+ float b_x1 = box_b[0 ] - b_dx_half, b_y1 = box_b[1 ] - b_dy_half;
124
+ float b_x2 = box_b[0 ] + b_dx_half, b_y2 = box_b[1 ] + b_dy_half;
126
125
127
- Point center_a ((a_x1 + a_x2) / 2 , (a_y1 + a_y2) / 2 );
128
- Point center_b ((b_x1 + b_x2) / 2 , (b_y1 + b_y2) / 2 );
126
+ Point center_a (box_a[ 0 ], box_a[ 1 ] );
127
+ Point center_b (box_b[ 0 ], box_b[ 1 ] );
129
128
130
129
Point box_a_corners[5 ];
131
130
box_a_corners[0 ].set (a_x1, a_y1);
@@ -209,50 +208,36 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) {
209
208
}
210
209
211
210
__device__ inline float iou_bev (const float *box_a, const float *box_b) {
212
- // params: box_a (5) [x1, y1, x2, y2, angle ]
213
- // params: box_b (5) [x1, y1, x2, y2, angle ]
214
- float sa = ( box_a[2 ] - box_a[ 0 ]) * ( box_a[3 ] - box_a[ 1 ]) ;
215
- float sb = ( box_b[2 ] - box_b[ 0 ]) * ( box_b[3 ] - box_b[ 1 ]) ;
211
+ // params box_a: [x, y, z, dx, dy, dz, heading ]
212
+ // params box_b: [x, y, z, dx, dy, dz, heading ]
213
+ float sa = box_a[3 ] * box_a[4 ] ;
214
+ float sb = box_b[3 ] * box_b[4 ] ;
216
215
float s_overlap = box_overlap (box_a, box_b);
217
216
return s_overlap / fmaxf (sa + sb - s_overlap, EPS);
218
217
}
219
218
220
- __global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel (
221
- const int num_a, const float *boxes_a, const int num_b,
222
- const float *boxes_b, float *ans_overlap) {
223
- CUDA_2D_KERNEL_LOOP (b_idx, num_b, a_idx, num_a) {
224
- if (a_idx >= num_a || b_idx >= num_b) {
225
- return ;
226
- }
227
- const float *cur_box_a = boxes_a + a_idx * 5 ;
228
- const float *cur_box_b = boxes_b + b_idx * 5 ;
229
- float s_overlap = box_overlap (cur_box_a, cur_box_b);
230
- ans_overlap[a_idx * num_b + b_idx] = s_overlap;
231
- }
232
- }
233
-
234
- __global__ void iou3d_boxes_iou_bev_forward_cuda_kernel (const int num_a,
235
- const float *boxes_a,
236
- const int num_b,
237
- const float *boxes_b,
238
- float *ans_iou) {
219
+ __global__ void iou3d_boxes_iou3d_forward_cuda_kernel (const int num_a,
220
+ const float *boxes_a,
221
+ const int num_b,
222
+ const float *boxes_b,
223
+ float *ans_iou) {
239
224
CUDA_2D_KERNEL_LOOP (b_idx, num_b, a_idx, num_a) {
240
225
if (a_idx >= num_a || b_idx >= num_b) {
241
226
return ;
242
227
}
243
228
244
- const float *cur_box_a = boxes_a + a_idx * 5 ;
245
- const float *cur_box_b = boxes_b + b_idx * 5 ;
229
+ const float *cur_box_a = boxes_a + a_idx * 7 ;
230
+ const float *cur_box_b = boxes_b + b_idx * 7 ;
246
231
float cur_iou_bev = iou_bev (cur_box_a, cur_box_b);
247
232
ans_iou[a_idx * num_b + b_idx] = cur_iou_bev;
248
233
}
249
234
}
250
235
251
- __global__ void nms_forward_cuda_kernel (const int boxes_num,
252
- const float nms_overlap_thresh,
253
- const float *boxes,
254
- unsigned long long *mask) {
255
- // params: boxes (N, 5 ) [x1, y1, x2, y2, ry ]
236
+ __global__ void iou3d_nms3d_forward_cuda_kernel (const int boxes_num,
237
+ const float nms_overlap_thresh,
238
+ const float *boxes,
239
+ unsigned long long *mask) {
240
+ // params: boxes (N, 7 ) [x, y, z, dx, dy, dz, heading ]
256
241
// params: mask (N, N/THREADS_PER_BLOCK_NMS)
257
242
const int blocks =
258
243
(boxes_num + THREADS_PER_BLOCK_NMS - 1 ) / THREADS_PER_BLOCK_NMS;
@@ -264,25 +249,29 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
264
249
const int col_size = fminf (boxes_num - col_start * THREADS_PER_BLOCK_NMS,
265
250
THREADS_PER_BLOCK_NMS);
266
251
267
- __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5 ];
252
+ __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7 ];
268
253
269
254
if (threadIdx .x < col_size) {
270
- block_boxes[threadIdx .x * 5 + 0 ] =
271
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 0 ];
272
- block_boxes[threadIdx .x * 5 + 1 ] =
273
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 1 ];
274
- block_boxes[threadIdx .x * 5 + 2 ] =
275
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 2 ];
276
- block_boxes[threadIdx .x * 5 + 3 ] =
277
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 3 ];
278
- block_boxes[threadIdx .x * 5 + 4 ] =
279
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 4 ];
255
+ block_boxes[threadIdx .x * 7 + 0 ] =
256
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 0 ];
257
+ block_boxes[threadIdx .x * 7 + 1 ] =
258
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 1 ];
259
+ block_boxes[threadIdx .x * 7 + 2 ] =
260
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 2 ];
261
+ block_boxes[threadIdx .x * 7 + 3 ] =
262
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 3 ];
263
+ block_boxes[threadIdx .x * 7 + 4 ] =
264
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 4 ];
265
+ block_boxes[threadIdx .x * 7 + 5 ] =
266
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 5 ];
267
+ block_boxes[threadIdx .x * 7 + 6 ] =
268
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 6 ];
280
269
}
281
270
__syncthreads ();
282
271
283
272
if (threadIdx .x < row_size) {
284
273
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx .x ;
285
- const float *cur_box = boxes + cur_box_idx * 5 ;
274
+ const float *cur_box = boxes + cur_box_idx * 7 ;
286
275
287
276
int i = 0 ;
288
277
unsigned long long t = 0 ;
@@ -291,7 +280,7 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
291
280
start = threadIdx .x + 1 ;
292
281
}
293
282
for (i = start; i < col_size; i++) {
294
- if (iou_bev (cur_box, block_boxes + i * 5 ) > nms_overlap_thresh) {
283
+ if (iou_bev (cur_box, block_boxes + i * 7 ) > nms_overlap_thresh) {
295
284
t |= 1ULL << i;
296
285
}
297
286
}
@@ -303,20 +292,24 @@ __global__ void nms_forward_cuda_kernel(const int boxes_num,
303
292
}
304
293
305
294
__device__ inline float iou_normal (float const *const a, float const *const b) {
306
- float left = fmaxf (a[0 ], b[0 ]), right = fminf (a[2 ], b[2 ]);
307
- float top = fmaxf (a[1 ], b[1 ]), bottom = fminf (a[3 ], b[3 ]);
295
+ // params: a: [x, y, z, dx, dy, dz, heading]
296
+ // params: b: [x, y, z, dx, dy, dz, heading]
297
+
298
+ float left = fmaxf (a[0 ] - a[3 ] / 2 , b[0 ] - b[3 ] / 2 ),
299
+ right = fminf (a[0 ] + a[3 ] / 2 , b[0 ] + b[3 ] / 2 );
300
+ float top = fmaxf (a[1 ] - a[4 ] / 2 , b[1 ] - b[4 ] / 2 ),
301
+ bottom = fminf (a[1 ] + a[4 ] / 2 , b[1 ] + b[4 ] / 2 );
308
302
float width = fmaxf (right - left, 0 .f ), height = fmaxf (bottom - top, 0 .f );
309
303
float interS = width * height;
310
- float Sa = (a[ 2 ] - a[ 0 ]) * (a[ 3 ] - a[ 1 ]) ;
311
- float Sb = (b[ 2 ] - b[ 0 ]) * (b[ 3 ] - b[ 1 ]) ;
304
+ float Sa = a[ 3 ] * a[ 4 ] ;
305
+ float Sb = b[ 3 ] * b[ 4 ] ;
312
306
return interS / fmaxf (Sa + Sb - interS, EPS);
313
307
}
314
308
315
- __global__ void nms_normal_forward_cuda_kernel (const int boxes_num,
316
- const float nms_overlap_thresh,
317
- const float *boxes,
318
- unsigned long long *mask) {
319
- // params: boxes (N, 5) [x1, y1, x2, y2, ry]
309
+ __global__ void iou3d_nms3d_normal_forward_cuda_kernel (
310
+ const int boxes_num, const float nms_overlap_thresh, const float *boxes,
311
+ unsigned long long *mask) {
312
+ // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
320
313
// params: mask (N, N/THREADS_PER_BLOCK_NMS)
321
314
322
315
const int blocks =
@@ -329,25 +322,29 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
329
322
const int col_size = fminf (boxes_num - col_start * THREADS_PER_BLOCK_NMS,
330
323
THREADS_PER_BLOCK_NMS);
331
324
332
- __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5 ];
325
+ __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7 ];
333
326
334
327
if (threadIdx .x < col_size) {
335
- block_boxes[threadIdx .x * 5 + 0 ] =
336
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 0 ];
337
- block_boxes[threadIdx .x * 5 + 1 ] =
338
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 1 ];
339
- block_boxes[threadIdx .x * 5 + 2 ] =
340
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 2 ];
341
- block_boxes[threadIdx .x * 5 + 3 ] =
342
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 3 ];
343
- block_boxes[threadIdx .x * 5 + 4 ] =
344
- boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 5 + 4 ];
328
+ block_boxes[threadIdx .x * 7 + 0 ] =
329
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 0 ];
330
+ block_boxes[threadIdx .x * 7 + 1 ] =
331
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 1 ];
332
+ block_boxes[threadIdx .x * 7 + 2 ] =
333
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 2 ];
334
+ block_boxes[threadIdx .x * 7 + 3 ] =
335
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 3 ];
336
+ block_boxes[threadIdx .x * 7 + 4 ] =
337
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 4 ];
338
+ block_boxes[threadIdx .x * 7 + 5 ] =
339
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 5 ];
340
+ block_boxes[threadIdx .x * 7 + 6 ] =
341
+ boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx .x ) * 7 + 6 ];
345
342
}
346
343
__syncthreads ();
347
344
348
345
if (threadIdx .x < row_size) {
349
346
const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx .x ;
350
- const float *cur_box = boxes + cur_box_idx * 5 ;
347
+ const float *cur_box = boxes + cur_box_idx * 7 ;
351
348
352
349
int i = 0 ;
353
350
unsigned long long t = 0 ;
@@ -356,7 +353,7 @@ __global__ void nms_normal_forward_cuda_kernel(const int boxes_num,
356
353
start = threadIdx .x + 1 ;
357
354
}
358
355
for (i = start; i < col_size; i++) {
359
- if (iou_normal (cur_box, block_boxes + i * 5 ) > nms_overlap_thresh) {
356
+ if (iou_normal (cur_box, block_boxes + i * 7 ) > nms_overlap_thresh) {
360
357
t |= 1ULL << i;
361
358
}
362
359
}
0 commit comments