From 07109f39eb2f352d7543d924e14d3c80ac077d3c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 20 Oct 2018 07:45:11 +0000 Subject: [PATCH 1/5] initial commit --- src/operator/tensor/indexing_op.cc | 95 ++++++++++++++++++++ src/operator/tensor/indexing_op.h | 65 ++++++++++++++ tests/python/unittest/test_sparse_ndarray.py | 13 +++ 3 files changed, 173 insertions(+) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index b663ef0179df..544d2846952f 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -87,6 +87,100 @@ void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, } } +struct CsrTakeDataKernel { + /*! + * \brief Map function for general case of take grad + * \param tid global thread id + * \param out_idx ptr to out idx + * \param out_data ptr to out data + * \param out_indptr ptr to out indptr + * \param src_data ptr to original csr data + * \param src_idx ptr to original csr idx + * \param idx_ptr ptr to indices + */ + template + MSHADOW_XINLINE static void Map(int tid, RType* out_idx, DType* out_data, + const RType* out_indptr, const RType* src_idx, + const DType* src_data, const RType* src_indptr, + const IType* idx_ptr) { + nnvm::dim_t idx = static_cast(idx_ptr[tid]); + int row_nnz = src_indptr[idx + 1] - src_indptr[idx]; + for (int i = 0; i < row_nnz; i++) { + out_data[out_indptr[tid] + i] = src_data[src_indptr[tid] + i]; + out_idx[out_indptr[tid] + i] = src_idx[src_indptr[tid] + i]; + } + } +}; + +struct CsrTakeRowCountKernel { + /*! + * \brief Map function for general case of take grad + * \param tid global thread id + * \param out_indptr ptr to out indptr + * \param src_indptr ptr to original csr indptr + * \param idx_ptr ptr to indices + */ + template + MSHADOW_XINLINE static void Map(int tid, RType* out_indptr, + const RType* src_indptr, const IType* idx_ptr) { + if (tid == 0) out_indptr[0] = 0; + nnvm::dim_t idx = static_cast(idx_ptr[tid - 1]); + out_indptr[tid - 1] = src_indptr[idx]; + } +}; + +template<> +void TakeOpForwardCsrImpl(const TakeParam& params, + const OpContext& ctx, + const TBlob& idx, + const NDArray& arr, + OpReqType req, + const NDArray& out) { + using namespace csr; + using namespace mxnet_op; + using nnvm::dim_t; + Stream *s = ctx.get_stream(); + if (req == kNullOp) return; + if (!arr.storage_initialized()) { + FillZerosCsrImpl(s, out); + return; + } + CHECK_EQ(idx.shape_.ndim(), 1U) + << "Take with CSR array expects its indices to be one-dimensional. " + << idx.shape_.ndim() << " dimensional input is given instead"; + CHECK_EQ(req, kWriteTo) << "req = " << req << " is not supported for take(csr)"; + auto axis = params.axis; + CHECK_EQ(axis, 0) << "axis = " << axis << " is not supported for take(csr)"; + const dim_t num_rows = out.shape()[0]; + out.CheckAndAllocAuxData(kIndPtr, {Shape1(num_rows + 1)}); + + MSHADOW_TYPE_SWITCH(idx.type_flag_, IType, { + MSHADOW_SGL_DBL_TYPE_SWITCH(arr.dtype(), DType, { + MSHADOW_IDX_TYPE_SWITCH(out.aux_type(kIdx), RType, { + RType* out_indptr = out.aux_data(kIndPtr).dptr(); + const RType* src_indptr = arr.aux_data(kIndPtr).dptr(); + const IType* idx_ptr = idx.dptr(); + + Kernel::Launch(s, num_rows + 1, + out_indptr, src_indptr, idx_ptr); + for (dim_t i = 0; i < num_rows; i++) { + out_indptr[i + 1] += out_indptr[i]; + } + // total number of non-zero rows + const dim_t nnz = out_indptr[num_rows]; + CHECK_GT(nnz, 0) << "Invalid nnz for take(csr)" << nnz; + out.CheckAndAllocAuxData(kIdx, {Shape1(nnz)}); + out.CheckAndAllocData(Shape1(nnz)); + RType* out_idx = out.aux_data(kIdx).dptr(); + DType* out_data = out.data().dptr(); + const RType* src_idx = arr.aux_data(kIdx).dptr(); + const DType* src_data = arr.data().dptr(); + Kernel::Launch(s, num_rows, out_idx, + out_data, out_indptr, src_idx, src_data, src_indptr, idx_ptr); + }); + }); + }); +} template<> inline void SparseEmbeddingOpBackwardRspImpl(const bool deterministic, @@ -441,6 +535,7 @@ Examples:: }) .set_attr("FInferShape", TakeOpShape) .set_attr("FInferType", TakeOpType) +.set_attr("FInferStorageType", TakeOpForwardStorageType) .set_attr("FResourceRequest", [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 1daf0a2cb18a..eb86bb7c9929 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -755,6 +755,71 @@ inline bool TakeOpType(const nnvm::NodeAttrs& attrs, return (*in_attrs)[0] != -1; } +// storage type inference function for take +inline bool TakeOpForwardStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + const int& idx_stype = in_attrs->at(take_::kIdx); + const int& arr_stype = in_attrs->at(take_::kArr); + int& out_stype = out_attrs->at(take_::kOut); + bool dispatched = false; + const TakeParam& param = nnvm::get(attrs.parsed); + if (!dispatched && idx_stype == kDefaultStorage && arr_stype == kDefaultStorage) { + // dns, dns -> dns + dispatched = storage_type_assign(&out_stype, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + } + if (!dispatched && idx_stype == kDefaultStorage && arr_stype == kCSRStorage && + param.axis == 0) { + // take(dns, csr, axis=0) -> csr + dispatched = storage_type_assign(&out_stype, kCSRStorage, + dispatch_mode, DispatchMode::kFComputeEx); + } + if (!dispatched) { + dispatched = dispatch_fallback(out_attrs, dispatch_mode); + } + return dispatched; +} + + +template +void TakeOpForwardCsrImpl(const TakeParam& params, + const OpContext& ctx, + const TBlob& idx, + const NDArray& arr, + OpReqType req, + const NDArray& output); + + +template +void TakeOpForwardEx(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(req[take_::kOut], kWriteTo); + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + const NDArray& idx = inputs[take_::kIdx]; + const NDArray& arr = inputs[take_::kArr]; + const NDArray& out = outputs[take_::kOut]; + const auto idx_stype = idx.storage_type(); + const auto arr_stype = arr.storage_type(); + const auto out_stype = out.storage_type(); + const auto params = nnvm::get(attrs.parsed); + if (idx_stype == kDefaultStorage && arr_stype == kCSRStorage && + out_stype == kDefaultStorage) { + // dns, csr -> csr + TakeOpForwardCsrImpl(params, ctx, idx.data(), arr, req[0], out); + } else { + LogUnimplementedOp(attrs, ctx, inputs, req, outputs); + } +} + template void TakeOpForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py index 875dea7313ae..b0af9caab9b7 100644 --- a/tests/python/unittest/test_sparse_ndarray.py +++ b/tests/python/unittest/test_sparse_ndarray.py @@ -1013,6 +1013,19 @@ def check_sparse_fc(batch_size, dim_in, dim_out, stype): # test FC with row_sparse weight w/ density=1, csr data (fallback) check_sparse_fc(5, 10, 8, 'csr') +@with_seed() +def test_sparse_take(): + def check_sparse_take(density): + data_shape = rand_shape_2d() + idx_shape = (np.random.randint(low=1, high=10),) + data = rand_ndarray(data_shape, 'csr', density=density) + idx = mx.nd.array(np.random.randint(low=0, high=data_shape[0], size=idx_shape)) + result = mx.nd.take(data, idx) + assert_almost_equal(result.asnumpy(), data.asnumpy()[idx.asnumpy().astype('int32')]) + densities = [0, 0.5, 1] + for d in densities: + check_sparse_take(d) + if __name__ == '__main__': import nose nose.runmodule() From b18dc94ce8867e561f64f4e3b5609694171fb5c4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 20 Oct 2018 18:43:23 +0000 Subject: [PATCH 2/5] add test cases for mode --- src/operator/tensor/indexing_op.cc | 51 +++++++++++++++++--- src/operator/tensor/indexing_op.h | 2 +- tests/python/unittest/test_sparse_ndarray.py | 15 ++++-- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index 544d2846952f..f10278948363 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -87,6 +87,7 @@ void SparseEmbeddingOpForwardRspImpl(const OpContext& ctx, } } +template struct CsrTakeDataKernel { /*! * \brief Map function for general case of take grad @@ -97,13 +98,23 @@ struct CsrTakeDataKernel { * \param src_data ptr to original csr data * \param src_idx ptr to original csr idx * \param idx_ptr ptr to indices + * \param num_rows maximum number of rows in src array */ template MSHADOW_XINLINE static void Map(int tid, RType* out_idx, DType* out_data, const RType* out_indptr, const RType* src_idx, const DType* src_data, const RType* src_indptr, - const IType* idx_ptr) { + const IType* idx_ptr, const nnvm::dim_t num_rows) { nnvm::dim_t idx = static_cast(idx_ptr[tid]); + // clip mode + if (clip) { + if (idx < 0) idx = 0; + if (idx >= num_rows) idx = num_rows - 1; + } else { + // wrap mode + idx = idx % num_rows; + idx += (idx < 0) ? num_rows : 0; + } int row_nnz = src_indptr[idx + 1] - src_indptr[idx]; for (int i = 0; i < row_nnz; i++) { out_data[out_indptr[tid] + i] = src_data[src_indptr[tid] + i]; @@ -112,6 +123,7 @@ struct CsrTakeDataKernel { } }; +template struct CsrTakeRowCountKernel { /*! * \brief Map function for general case of take grad @@ -119,12 +131,23 @@ struct CsrTakeRowCountKernel { * \param out_indptr ptr to out indptr * \param src_indptr ptr to original csr indptr * \param idx_ptr ptr to indices + * \param num_rows maximum number of rows in src array */ template MSHADOW_XINLINE static void Map(int tid, RType* out_indptr, - const RType* src_indptr, const IType* idx_ptr) { + const RType* src_indptr, const IType* idx_ptr, + const nnvm::dim_t num_rows) { if (tid == 0) out_indptr[0] = 0; nnvm::dim_t idx = static_cast(idx_ptr[tid - 1]); + // clip mode + if (clip) { + if (idx < 0) idx = 0; + if (idx >= num_rows) idx = num_rows - 1; + } else { + // wrap mode + idx = idx % num_rows; + idx += (idx < 0) ? num_rows : 0; + } out_indptr[tid - 1] = src_indptr[idx]; } }; @@ -146,12 +169,15 @@ void TakeOpForwardCsrImpl(const TakeParam& params, return; } CHECK_EQ(idx.shape_.ndim(), 1U) - << "Take with CSR array expects its indices to be one-dimensional. " + << "Take with CSR array only supports one-dimensional indices. " << idx.shape_.ndim() << " dimensional input is given instead"; CHECK_EQ(req, kWriteTo) << "req = " << req << " is not supported for take(csr)"; auto axis = params.axis; CHECK_EQ(axis, 0) << "axis = " << axis << " is not supported for take(csr)"; + CHECK(params.mode == take_::kClip || params.mode == take_::kWrap) + << "mode = " << params.mode << " is not supported"; const dim_t num_rows = out.shape()[0]; + const dim_t max_num_rows = arr.shape()[0]; out.CheckAndAllocAuxData(kIndPtr, {Shape1(num_rows + 1)}); MSHADOW_TYPE_SWITCH(idx.type_flag_, IType, { @@ -161,8 +187,14 @@ void TakeOpForwardCsrImpl(const TakeParam& params, const RType* src_indptr = arr.aux_data(kIndPtr).dptr(); const IType* idx_ptr = idx.dptr(); - Kernel::Launch(s, num_rows + 1, - out_indptr, src_indptr, idx_ptr); + bool clip = params.mode == take_::kClip; + if (clip) { + Kernel, cpu>::Launch(s, num_rows + 1, + out_indptr, src_indptr, idx_ptr, max_num_rows); + } else { + Kernel, cpu>::Launch(s, num_rows + 1, + out_indptr, src_indptr, idx_ptr, max_num_rows); + } for (dim_t i = 0; i < num_rows; i++) { out_indptr[i + 1] += out_indptr[i]; } @@ -175,8 +207,13 @@ void TakeOpForwardCsrImpl(const TakeParam& params, DType* out_data = out.data().dptr(); const RType* src_idx = arr.aux_data(kIdx).dptr(); const DType* src_data = arr.data().dptr(); - Kernel::Launch(s, num_rows, out_idx, - out_data, out_indptr, src_idx, src_data, src_indptr, idx_ptr); + if (clip) { + Kernel, cpu>::Launch(s, num_rows, out_idx, + out_data, out_indptr, src_idx, src_data, src_indptr, idx_ptr, max_num_rows); + } else { + Kernel, cpu>::Launch(s, num_rows, out_idx, + out_data, out_indptr, src_idx, src_data, src_indptr, idx_ptr, max_num_rows); + } }); }); }); diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index eb86bb7c9929..1bebaf035a97 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -774,7 +774,7 @@ inline bool TakeOpForwardStorageType(const nnvm::NodeAttrs& attrs, dispatch_mode, DispatchMode::kFCompute); } if (!dispatched && idx_stype == kDefaultStorage && arr_stype == kCSRStorage && - param.axis == 0) { + param.axis == 0 && (param.mode == take_::kWrap || param.mode == take_::kClip)) { // take(dns, csr, axis=0) -> csr dispatched = storage_type_assign(&out_stype, kCSRStorage, dispatch_mode, DispatchMode::kFComputeEx); diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py index b0af9caab9b7..8dd250cf98ee 100644 --- a/tests/python/unittest/test_sparse_ndarray.py +++ b/tests/python/unittest/test_sparse_ndarray.py @@ -1015,16 +1015,21 @@ def check_sparse_fc(batch_size, dim_in, dim_out, stype): @with_seed() def test_sparse_take(): - def check_sparse_take(density): + def check_sparse_take(density, mode): data_shape = rand_shape_2d() idx_shape = (np.random.randint(low=1, high=10),) data = rand_ndarray(data_shape, 'csr', density=density) - idx = mx.nd.array(np.random.randint(low=0, high=data_shape[0], size=idx_shape)) - result = mx.nd.take(data, idx) - assert_almost_equal(result.asnumpy(), data.asnumpy()[idx.asnumpy().astype('int32')]) + idx = mx.nd.array(np.random.randint(low=-5, high=15, size=idx_shape)) + result = mx.nd.take(data, idx, mode=mode) + data_np = data.asnumpy() + idx_np = idx.asnumpy().astype('int32') + expected_result = np.take(data_np, idx_np, mode=mode, axis=0) + assert_almost_equal(result.asnumpy(), expected_result) densities = [0, 0.5, 1] + modes = ['clip', 'wrap'] for d in densities: - check_sparse_take(d) + for m in modes: + check_sparse_take(d, m) if __name__ == '__main__': import nose From c120dc975f25691b6c662a696fb2f437c39bd969 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 20 Oct 2018 19:41:00 +0000 Subject: [PATCH 3/5] fix bug --- src/operator/tensor/indexing_op.cc | 18 ++++++++++++++---- src/operator/tensor/indexing_op.h | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index f10278948363..c7d7688eca6d 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -117,8 +117,8 @@ struct CsrTakeDataKernel { } int row_nnz = src_indptr[idx + 1] - src_indptr[idx]; for (int i = 0; i < row_nnz; i++) { - out_data[out_indptr[tid] + i] = src_data[src_indptr[tid] + i]; - out_idx[out_indptr[tid] + i] = src_idx[src_indptr[tid] + i]; + out_data[out_indptr[tid] + i] = src_data[src_indptr[idx] + i]; + out_idx[out_indptr[tid] + i] = src_idx[src_indptr[idx] + i]; } } }; @@ -148,7 +148,7 @@ struct CsrTakeRowCountKernel { idx = idx % num_rows; idx += (idx < 0) ? num_rows : 0; } - out_indptr[tid - 1] = src_indptr[idx]; + out_indptr[tid] = src_indptr[idx + 1] - src_indptr[idx]; } }; @@ -200,7 +200,10 @@ void TakeOpForwardCsrImpl(const TakeParam& params, } // total number of non-zero rows const dim_t nnz = out_indptr[num_rows]; - CHECK_GT(nnz, 0) << "Invalid nnz for take(csr)" << nnz; + if (nnz == 0) { + FillZerosCsrImpl(s, out); + return; + } out.CheckAndAllocAuxData(kIdx, {Shape1(nnz)}); out.CheckAndAllocData(Shape1(nnz)); RType* out_idx = out.aux_data(kIdx).dptr(); @@ -531,6 +534,7 @@ dimension of data (by default outer-most one as axis=0) indexed by indices, and in an output tensor of rank q + (r - 1). Examples:: + x = [4. 5. 6.] // Trivial case, take the second element along the first axis. @@ -562,6 +566,11 @@ Examples:: [[ 3., 4.], [ 5., 6.]]] +The storage type of ``take`` output depends upon the input storage type: + + - take(default, default) = default + - take(csr, default, axis=0) = csr + )code" ADD_FILELINE) .set_num_inputs(2) .set_num_outputs(1) @@ -578,6 +587,7 @@ Examples:: return std::vector{ResourceRequest::kTempSpace}; }) .set_attr("FCompute", TakeOpForward) +.set_attr("FComputeEx", TakeOpForwardEx) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { return MakeNonlossGradNode("_backward_take", n, ograds, diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 1bebaf035a97..5282a7ea9a61 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -812,7 +812,7 @@ void TakeOpForwardEx(const nnvm::NodeAttrs& attrs, const auto out_stype = out.storage_type(); const auto params = nnvm::get(attrs.parsed); if (idx_stype == kDefaultStorage && arr_stype == kCSRStorage && - out_stype == kDefaultStorage) { + out_stype == kCSRStorage) { // dns, csr -> csr TakeOpForwardCsrImpl(params, ctx, idx.data(), arr, req[0], out); } else { From 0353c363ec3b7668bb25fa1267ce5ed59e310d0c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 20 Oct 2018 19:42:47 +0000 Subject: [PATCH 4/5] add comment --- src/operator/tensor/indexing_op.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index c7d7688eca6d..0134be5e7b32 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -195,6 +195,7 @@ void TakeOpForwardCsrImpl(const TakeParam& params, Kernel, cpu>::Launch(s, num_rows + 1, out_indptr, src_indptr, idx_ptr, max_num_rows); } + // calculate prefix sum with single thread for (dim_t i = 0; i < num_rows; i++) { out_indptr[i + 1] += out_indptr[i]; } From 6634e6dc597c17544396dd80821ba08cf3203c51 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sat, 20 Oct 2018 19:44:56 +0000 Subject: [PATCH 5/5] more comments --- src/operator/tensor/indexing_op.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc index 0134be5e7b32..98e2536f4c9b 100644 --- a/src/operator/tensor/indexing_op.cc +++ b/src/operator/tensor/indexing_op.cc @@ -186,7 +186,7 @@ void TakeOpForwardCsrImpl(const TakeParam& params, RType* out_indptr = out.aux_data(kIndPtr).dptr(); const RType* src_indptr = arr.aux_data(kIndPtr).dptr(); const IType* idx_ptr = idx.dptr(); - + // gather per row nnz information for output bool clip = params.mode == take_::kClip; if (clip) { Kernel, cpu>::Launch(s, num_rows + 1, @@ -211,6 +211,7 @@ void TakeOpForwardCsrImpl(const TakeParam& params, DType* out_data = out.data().dptr(); const RType* src_idx = arr.aux_data(kIdx).dptr(); const DType* src_data = arr.data().dptr(); + // copy indices and data for output if (clip) { Kernel, cpu>::Launch(s, num_rows, out_idx, out_data, out_indptr, src_idx, src_data, src_indptr, idx_ptr, max_num_rows);