Skip to content

Commit

Permalink
add backgroud class in box_nms (apache#14058)
Browse files Browse the repository at this point in the history
* add backgroud class in box_nms

* add unittest

* trigger CI
  • Loading branch information
arcadiaphy authored and vdantu committed Mar 31, 2019
1 parent 1aaa946 commit 6a4f1c9
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 46 deletions.
24 changes: 22 additions & 2 deletions src/operator/contrib/bounding_box-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,32 @@ struct nms_impl {

namespace mshadow_op {
struct less_than : public mxnet_op::tunable {
// a is x, b is sigma
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return static_cast<DType>(a < b);
}
}; // struct equal_to
};

struct greater_than : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return static_cast<DType>(a > b);
}
};

struct not_equal : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return static_cast<DType>(a != b);
}
};

struct bool_and : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return static_cast<DType>(a && b);
}
};
} // namespace mshadow_op

} // namespace op
Expand Down
26 changes: 10 additions & 16 deletions src/operator/contrib/bounding_box-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,20 @@ namespace mxnet {
namespace op {

template<typename DType>
struct valid_score {
DType thresh;
explicit valid_score(DType _thresh) : thresh(_thresh) {}
struct valid_value {
__host__ __device__ bool operator()(const DType x) {
return x > thresh;
return static_cast<bool>(x);
}
};

template<typename DType>
int FilterScores(mshadow::Tensor<gpu, 1, DType> out_scores,
mshadow::Tensor<gpu, 1, int32_t> out_sorted_index,
mshadow::Tensor<gpu, 1, DType> scores,
mshadow::Tensor<gpu, 1, int32_t> sorted_index,
float valid_thresh) {
valid_score<DType> pred(static_cast<DType>(valid_thresh));
DType * end_scores = thrust::copy_if(thrust::device, scores.dptr_, scores.dptr_ + scores.MSize(),
out_scores.dptr_, pred);
thrust::copy_if(thrust::device, sorted_index.dptr_, sorted_index.dptr_ + sorted_index.MSize(),
scores.dptr_, out_sorted_index.dptr_, pred);
return end_scores - out_scores.dptr_;
template<typename DType, typename FType>
int CopyIf(mshadow::Tensor<gpu, 1, DType> out,
mshadow::Tensor<gpu, 1, DType> value,
mshadow::Tensor<gpu, 1, FType> flag) {
valid_value<FType> pred;
DType *end_out = thrust::copy_if(thrust::device, value.dptr_, value.dptr_ + value.MSize(),
flag.dptr_, out.dptr_, pred);
return end_out - out.dptr_;
}

// compute line intersect along either height or width
Expand Down
49 changes: 32 additions & 17 deletions src/operator/contrib/bounding_box-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct BoxNMSParam : public dmlc::Parameter<BoxNMSParam> {
int coord_start;
int score_index;
int id_index;
int background_id;
bool force_suppress;
int in_format;
int out_format;
Expand All @@ -67,6 +68,8 @@ struct BoxNMSParam : public dmlc::Parameter<BoxNMSParam> {
.describe("Index of the scores/confidence of boxes.");
DMLC_DECLARE_FIELD(id_index).set_default(-1)
.describe("Optional, index of the class categories, -1 to disable.");
DMLC_DECLARE_FIELD(background_id).set_default(-1)
.describe("Optional, id of the background class which will be ignored in nms.");
DMLC_DECLARE_FIELD(force_suppress).set_default(false)
.describe("Optional, if set false and id_index is provided, nms will only apply"
" to boxes belongs to the same category");
Expand Down Expand Up @@ -103,7 +106,7 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
<< ishape << " provided";
int width_elem = ishape[indim - 1];
int expected = 5;
if (param.id_index > 0) {
if (param.id_index >= 0) {
expected += 1;
}
CHECK_GE(width_elem, expected)
Expand Down Expand Up @@ -145,24 +148,20 @@ inline uint32_t BoxNMSNumVisibleOutputs(const NodeAttrs& attrs) {
return static_cast<uint32_t>(1);
}

template<typename DType>
int FilterScores(mshadow::Tensor<cpu, 1, DType> out_scores,
mshadow::Tensor<cpu, 1, int32_t> out_sorted_index,
mshadow::Tensor<cpu, 1, DType> scores,
mshadow::Tensor<cpu, 1, int32_t> sorted_index,
float valid_thresh) {
template<typename DType, typename FType>
int CopyIf(mshadow::Tensor<cpu, 1, DType> out,
mshadow::Tensor<cpu, 1, DType> value,
mshadow::Tensor<cpu, 1, FType> flag) {
index_t j = 0;
for (index_t i = 0; i < scores.size(0); i++) {
if (scores[i] > valid_thresh) {
out_scores[j] = scores[i];
out_sorted_index[j] = sorted_index[i];
for (index_t i = 0; i < flag.size(0); i++) {
if (static_cast<bool>(flag[i])) {
out[j] = value[i];
j++;
}
}
return j;
}


struct corner_to_center {
template<typename DType>
MSHADOW_XINLINE static void Map(int i, DType *data, int stride) {
Expand Down Expand Up @@ -351,6 +350,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs,
int num_batch = indim <= 2? 1 : in_shape.ProdShape(0, indim - 2);
int num_elem = in_shape[indim - 2];
int width_elem = in_shape[indim - 1];
bool class_exist = param.id_index >= 0;
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
Tensor<xpu, 3, DType> data = inputs[box_nms_enum::kData]
.get_with_shape<xpu, 3, DType>(Shape3(num_batch, num_elem, width_elem), s);
Expand All @@ -366,7 +366,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs,

// index
index_t int32_size = sort_index_shape.Size() * 3 + batch_start_shape.Size();
index_t dtype_size = sort_index_shape.Size() * 2;
index_t dtype_size = sort_index_shape.Size() * 3;
if (req[0] == kWriteInplace) {
dtype_size += buffer_shape.Size();
}
Expand All @@ -385,6 +385,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs,
Tensor<xpu, 1, DType> scores(workspace.dptr_ + int32_offset,
sort_index_shape, s);
Tensor<xpu, 1, DType> areas(scores.dptr_ + scores.MSize(), sort_index_shape, s);
Tensor<xpu, 1, DType> classes(areas.dptr_ + areas.MSize(), sort_index_shape, s);
Tensor<xpu, 3, DType> buffer = data;
if (req[0] == kWriteInplace) {
// make copy
Expand All @@ -405,16 +406,30 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs,
return;
}

// use batch_id and areas as temporary storage
// use classes, areas and scores as temporary storage
Tensor<xpu, 1, DType> all_scores = areas;
// Tensor<xpu, 1, DType> all_sorted_index = areas;
all_scores = reshape(slice<2>(buffer, score_index, score_index + 1), all_scores.shape_);
all_sorted_index = range<int32_t>(0, num_batch * num_elem);
Tensor<xpu, 1, DType> all_classes = classes;
if (class_exist) {
all_classes = reshape(slice<2>(buffer, id_index, id_index + 1), classes.shape_);
}

// filter scores but keep original sorted_index value
// move valid score and index to the front, return valid size
int num_valid = mxnet::op::FilterScores(scores, sorted_index, all_scores, all_sorted_index,
param.valid_thresh);
Tensor<xpu, 1, DType> valid_box = scores;
if (class_exist) {
valid_box = F<mshadow_op::bool_and>(
F<mshadow_op::greater_than>(all_scores, ScalarExp<DType>(param.valid_thresh)),
F<mshadow_op::not_equal>(all_classes, ScalarExp<DType>(param.background_id)));
} else {
valid_box = F<mshadow_op::greater_than>(all_scores, ScalarExp<DType>(param.valid_thresh));
}
classes = F<mshadow_op::identity>(valid_box);
valid_box = classes;
int num_valid = mxnet::op::CopyIf(scores, all_scores, valid_box);
mxnet::op::CopyIf(sorted_index, all_sorted_index, valid_box);

// if everything is filtered, output -1
if (num_valid == 0) {
record = -1;
Expand Down
8 changes: 6 additions & 2 deletions src/operator/contrib/bounding_box.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ NNVM_REGISTER_OP(_contrib_box_nms)
.describe(R"code(Apply non-maximum suppression to input.
The output will be sorted in descending order according to `score`. Boxes with
overlaps larger than `overlap_thresh` and smaller scores will be removed and
filled with -1, the corresponding position will be recorded for backward propogation.
overlaps larger than `overlap_thresh`, smaller scores and background boxes
will be removed and filled with -1, the corresponding position will be recorded
for backward propogation.
During back-propagation, the gradient will be copied to the original
position according to the input index. For positions that have been suppressed,
Expand All @@ -60,6 +61,9 @@ additional elements are allowed.
- `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`, which means
we will skip highly overlapped boxes if one is `apple` while the other is `car`.
- `background_id`: optional, default=-1, class id for background boxes, useful
when `id_index >= 0` which means boxes with background id will be filtered before nms.
- `coord_start`: required, default=2, the starting index of the 4 coordinates.
Two formats are supported:
Expand Down
41 changes: 32 additions & 9 deletions tests/python/unittest/test_contrib_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,23 @@
import unittest

def test_box_nms_op():
def test_box_nms_forward(data, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1, cid=0,
force=False, in_format='corner', out_format='corner'):
def test_box_nms_forward(data, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1, cid=0, bid=-1,
force=False, in_format='corner', out_format='corner'):
for dtype in ['float16', 'float32', 'float64']:
data = mx.nd.array(data, dtype=dtype)
out = mx.contrib.nd.box_nms(data, overlap_thresh=thresh, valid_thresh=valid, topk=topk,
coord_start=coord, score_index=score, id_index=cid,
force_suppress=force, in_format=in_format, out_format=out_format)
coord_start=coord, score_index=score, id_index=cid, background_id=bid,
force_suppress=force, in_format=in_format, out_format=out_format)
assert_almost_equal(out.asnumpy(), expected.astype(dtype), rtol=1e-3, atol=1e-3)

def test_box_nms_backward(data, grad, expected, thresh=0.5, valid=0, topk=-1, coord=2, score=1,
cid=0, force=False, in_format='corner', out_format='corner'):
cid=0, bid=-1, force=False, in_format='corner', out_format='corner'):
in_var = mx.sym.Variable('data')
arr_data = mx.nd.array(data)
arr_grad = mx.nd.empty(arr_data.shape)
op = mx.contrib.sym.box_nms(in_var, overlap_thresh=thresh, valid_thresh=valid, topk=topk,
coord_start=coord, score_index=score, id_index=cid,
force_suppress=force, in_format=in_format, out_format=out_format)
coord_start=coord, score_index=score, id_index=cid, background_id=bid,
force_suppress=force, in_format=in_format, out_format=out_format)
exe = op.bind(ctx=default_context(), args=[arr_data], args_grad=[arr_grad])
exe.forward(is_train=True)
exe.backward(mx.nd.array(grad))
Expand Down Expand Up @@ -91,8 +91,8 @@ def swap_position(data, expected, coord=2, score=1, cid=0, new_col=0):
[0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]]

# case1
force=True
thresh=0.5
force = True
thresh = 0.5
expected = [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2],
[0, 0.3, 0.1, 0.1, 0.14, 0.14], [-1, -1, -1, -1, -1, -1]]
grad = np.random.rand(4, 6)
Expand Down Expand Up @@ -176,6 +176,29 @@ def swap_position(data, expected, coord=2, score=1, cid=0, new_col=0):
test_box_nms_forward(np.array(boxes8), np.array(expected8), force=force, thresh=thresh, valid=valid, topk=topk)
test_box_nms_backward(np.array(boxes8), grad8, expected_in_grad8, force=force, thresh=thresh, valid=valid, topk=topk)

# case9: background id filter out
# default background id -1
boxes9 = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [0, 0.4, 0.1, 0.1, 0.2, 0.2],
[1, 0.3, 0.1, 0.1, 0.14, 0.14], [-1, 0.6, 0.5, 0.5, 0.7, 0.8]]
expected9 = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.3, 0.1, 0.1, 0.14, 0.14],
[-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]
force = True
thresh = 0.5
grad9 = np.random.rand(4, 6)
expected_in_grad9 = grad9[(0, 2, 1, 3), :]
expected_in_grad9[(1, 3), :] = 0
test_box_nms_forward(np.array(boxes9), np.array(expected9), force=force, thresh=thresh)
test_box_nms_backward(np.array(boxes9), grad9, expected_in_grad9, force=force, thresh=thresh)
# set background id
background_id = 0
expected9 = [[-1, 0.6, 0.5, 0.5, 0.7, 0.8], [1, 0.3, 0.1, 0.1, 0.14, 0.14],
[-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]
grad9 = np.random.rand(4, 6)
expected_in_grad9 = grad9[(2, 3, 1, 0), :]
expected_in_grad9[(0, 1), :] = 0
test_box_nms_forward(np.array(boxes9), np.array(expected9), force=force, thresh=thresh, bid=background_id)
test_box_nms_backward(np.array(boxes9), grad9, expected_in_grad9, force=force, thresh=thresh, bid=background_id)

def test_box_iou_op():
def numpy_box_iou(a, b, fmt='corner'):
def area(left, top, right, bottom):
Expand Down

0 comments on commit 6a4f1c9

Please sign in to comment.