diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index ee3d6e53db59..114e9a810934 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -7111,6 +7111,7 @@ def insert(arr, obj, values, axis=None): """ return _mx_nd_np.insert(arr, obj, values, axis=axis) + @set_module('mxnet.numpy') def nonzero(a): """ diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 19af9c2853be..5e45bd3cfe75 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -2494,14 +2494,14 @@ def insert(arr, obj, values, axis=None): Parameters ---------- - arr : ndarray + arr : _Symbol Input array. - obj : int, slice or ndarray of ints + obj : int, slice or _Symbol of ints Object that defines the index or indices before which `values` is inserted. Support for multiple insertions when `obj` is a single scalar or a sequence with one element (only support int32 and int64 element). - values : ndarray + values : _Symbol Values to insert into `arr`. The type of `values` should equal to the type of `arr`. `values` should be shaped so that ``arr[...,obj,...] = values`` @@ -2512,7 +2512,7 @@ def insert(arr, obj, values, axis=None): Returns ------- - out : ndarray + out : _Symbol A copy of `arr` with `values` inserted. Note that `insert` does not occur in-place: a new array is returned. If `axis` is None, `out` is a flattened array. @@ -2522,49 +2522,6 @@ def insert(arr, obj, values, axis=None): Note that for higher dimensional inserts `obj=0` behaves very different from `obj=[0]` just like `arr[:,0,:] = values` is different from `arr[:,[0],:] = values`. - - Examples - -------- - >>> a = np.array([[1, 1], [2, 2], [3, 3]]) - >>> a - array([[1., 1.], - [2., 2.], - [3., 3.]]) - >>> np.insert(a, 1, np.array(5)) - array([1., 5., 1., 2., 2., 3., 3.]) - >>> np.insert(a, 1, np.array(5), axis=1) - array([[1., 5., 1.], - [2., 5., 2.], - [3., 5., 3.]]) - - Difference between sequence and scalars: - - >>> np.insert(a, np.array([1], dtype=np.int32), np.array([[1],[2],[3]]), axis=1) - array([[1., 1., 1.], - [2., 2., 2.], - [3., 3., 3.]]) - >>> np.insert(a, 1, np.array([1, 2, 3]), axis=1) - array([[1., 1., 1.], - [2., 2., 2.], - [3., 3., 3.]]) - - >>> b = a.flatten() - >>> b - array([1., 1., 2., 2., 3., 3.]) - >>> np.insert(b, np.array([2, 2], dtype=np.int64), np.array([5, 6])) - array([1., 1., 5., 6., 2., 2., 3., 3.]) - - >>> np.insert(b, slice(2, 4), np.array([5, 6])) - array([1., 1., 5., 2., 6., 2., 3., 3.]) - - >>> np.insert(b, np.array([2, 2], dtype=np.int32), np.array([7.13, False])) - array([1. , 1. , 7.13, 0. , 2. , 2. , 3. , 3. ]) - - >>> x = np.arange(8).reshape(2, 4) - >>> idx = np.array([1, 3], dtype=np.int32) - >>> np.insert(x, idx, np.array([999]), axis=1) - array([[ 0., 999., 1., 2., 999., 3.], - [ 4., 999., 5., 6., 999., 7.]]) """ if not isinstance(arr, ndarray): # pylint: disable= undefined-variable raise TypeError("'arr' can not support type {}".format(str(type(arr)))) diff --git a/src/operator/numpy/np_insert_op-inl.h b/src/operator/numpy/np_insert_op-inl.h index 1e54cd760975..4fd17deca84c 100644 --- a/src/operator/numpy/np_insert_op-inl.h +++ b/src/operator/numpy/np_insert_op-inl.h @@ -61,12 +61,16 @@ struct NumpyInsertParam : public dmlc::Parameter { }; namespace insert_ { -enum InsertOpInputs {kArr, kValues, kObj}; -enum InsertOpOutputs {kOut}; + // insert 'values' to 'arr' according to 'obj' + enum InsertOpInputs {kArr, kValues, kObj}; + enum InsertOpOutputs {kOut}; } // namespace insert_ template -struct InsertZeroNdimForward { +struct InsertZeroNdimForward { + /*! + * \brief when axis is not None but arr.mdim == 0, output = values + */ template MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data) { KERNEL_ASSIGN(out_data[i], req, in_data[i]); @@ -75,6 +79,19 @@ struct InsertZeroNdimForward { template struct InsertSingleIndexForward { + /*! + * \brief insert when obj is 'scaler' or a 'slice' with only one element. + * \tparam xpu - cpu or gpu. + * \param out_data - output: insert 'value' to 'arr' according to 'index'. + * \param in_arr - input: 'arr', original array. + * \param index - input: it's the only element in 'obj' indicats insert position. + * \param in_val - input: 'value', insert to 'arr' according to 'index'. + * \param numnew - extra dim size in 'out_data' compared with 'arr' in 'axis'. + * \param axis - insert 'value' to 'arr' in 'axis'. + * \param ndim - both 'in_arr', 'in_val' and 'out_data' have same ndim before call this. + * \param moveaxis - If 'obj' is a scaler, moveaxis is true; + If 'obj' is a slice with one element, moveaxis is false. + */ template MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_val, const DType* in_arr, @@ -85,60 +102,66 @@ struct InsertSingleIndexForward { const mshadow::Shape<10> old_val_stride, const mshadow::Shape<10> arr_stride, const mshadow::Shape<10> out_stride, - const int arr_ndim, const int val_ndim, - const int out_ndim, const int axis, + const int ndim, const int axis, bool moveaxis) { - const int64_t out_head = i / out_stride[axis]; - const int64_t out_mid = out_head % outshape[axis]; + // i is the global flattened index in the output mshadow::Shape<10> out_idx; // i -> position in output's shape - for (int j = 0; j < out_ndim; ++j) { + for (int j = 0; j < ndim; ++j) { const int64_t head = i / out_stride[j]; const int64_t mid = head % outshape[j]; out_idx[j] = mid; } int64_t dest_idx; - if (out_mid >= index && out_mid < index + numnew) { - int idx_val = out_mid - index; + if (out_idx[axis] >= index && out_idx[axis] < index + numnew) { // from 'value' + int idx_val = out_idx[axis] - index; mshadow::Shape<10> val_idx(out_idx); // i -> position in values's shape val_idx[axis] = idx_val; - for (int j = out_ndim - 1, k = val_ndim - 1; j >= 0 || k >= 0; --j, --k) { - if (j >= 0 && k >= 0) { - if (valshape[k] == 1) { - val_idx[k] = 0; - } - } else if (j >= 0) { - val_idx[j] = 1; - } else { - break; + for (int j = ndim - 1; j >= 0; --j) { + if (valshape[j] == 1) { // broadcast + val_idx[j] = 0; } } dest_idx = 0; - if (moveaxis) { - for (int _i = 0; _i < axis; ++_i) { - dest_idx += old_val_stride[_i + 1] * val_idx[_i]; + if (moveaxis) { // moveaxis(values, 0, axis) + for (int j = 0; j < axis; ++j) { + dest_idx += old_val_stride[j + 1] * val_idx[j]; } dest_idx += old_val_stride[0] * val_idx[axis]; - for (int _i = axis + 1; _i < val_ndim ; ++_i) { - dest_idx += old_val_stride[_i] *val_idx[_i]; + for (int j = axis + 1; j < ndim ; ++j) { + dest_idx += old_val_stride[j] *val_idx[j]; } } else { - for (int _i =0; _i < val_ndim; ++_i) { - dest_idx += val_stride[_i] * val_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += val_stride[j] * val_idx[j]; } } KERNEL_ASSIGN(out_data[i], req, in_val[dest_idx]); - } else { - int idx_arr = (out_mid < index) ? out_mid : out_mid - numnew; + } else { // from 'arr' + int idx_arr = (out_idx[axis] < index) ? + out_idx[axis] : out_idx[axis] - numnew; mshadow::Shape<10> arr_idx(out_idx); // i -> position in arr's shape arr_idx[axis] = idx_arr; dest_idx = 0; - for (int _i =0; _i < arr_ndim; ++_i) { - dest_idx += arr_stride[_i] * arr_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += arr_stride[j] * arr_idx[j]; } KERNEL_ASSIGN(out_data[i], req, in_arr[dest_idx]); } } - + /*! + * \brief insert when obj is 'tensor' with only one element. + * \tparam xpu - cpu or gpu. + * \param out_data - output: insert 'value' to 'arr' according to 'index'. + * \param in_arr - input: 'arr', original array. + * \param in_obj - input: It indicats insert position, ndim may equals to 0. + * \param in_val - input: 'value', insert to 'arr' according to 'index'. + * \param N - arr.shape_[axis] + * \param numnew - extra dim size in 'out_data' compared with 'arr' in 'axis'. + * \param axis - insert 'value' to 'arr' in 'axis'. + * \param ndim - both 'in_arr', 'in_val' and 'out_data' have same ndim before call this. + * \param moveaxis - If 'obj' is a tensor with ndim == 0, regard it as a scaler and moveaxis is true; + If 'obj' is a tensor with ndim > 0 but has only one element, moveaxis is false. + */ template MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_val, const DType* in_arr, @@ -149,13 +172,11 @@ struct InsertSingleIndexForward { const mshadow::Shape<10> old_val_stride, const mshadow::Shape<10> arr_stride, const mshadow::Shape<10> out_stride, - const int arr_ndim, const int val_ndim, - const int out_ndim, const int axis, + const int ndim, const int axis, bool moveaxis) { - const int64_t out_head = i / out_stride[axis]; - const int64_t out_mid = out_head % outshape[axis]; + // i is the global flattened index in the output mshadow::Shape<10> out_idx; // i -> position in output's shape - for (int j = 0; j < out_ndim; ++j) { + for (int j = 0; j < ndim; ++j) { const int64_t head = i / out_stride[j]; const int64_t mid = head % outshape[j]; out_idx[j] = mid; @@ -163,53 +184,79 @@ struct InsertSingleIndexForward { int64_t dest_idx; IType index = in_obj[0]; if (static_cast(index) < 0) { - index += static_cast(N); + index += static_cast(N); } - if (out_mid >= index && out_mid < index + numnew) { - int idx_val = out_mid - index; - mshadow::Shape<10> val_idx(out_idx); + if (out_idx[axis] >= index && out_idx[axis] < index + numnew) { // from 'value' + int idx_val = out_idx[axis] - index; + mshadow::Shape<10> val_idx(out_idx); // i -> position in values's shape val_idx[axis] = idx_val; - for (int j = out_ndim - 1, k = val_ndim - 1; j >= 0 || k >= 0; --j, --k) { - if (j >= 0 && k >= 0) { - if (valshape[k] == 1) { - val_idx[k] = 0; - } - } else if (j >= 0) { - val_idx[j] = 1; - } else { - break; + for (int j = ndim - 1; j >= 0; --j) { + if (valshape[j] == 1) { // broadcast + val_idx[j] = 0; } } dest_idx = 0; - if (moveaxis) { - for (int _i = 0; _i < axis; ++_i) { - dest_idx += old_val_stride[_i + 1] * val_idx[_i]; + if (moveaxis) { // moveaxis(values, 0, axis) + for (int j = 0; j < axis; ++j) { + dest_idx += old_val_stride[j + 1] * val_idx[j]; } dest_idx += old_val_stride[0] * val_idx[axis]; - for (int _i = axis + 1; _i < val_ndim ; ++_i) { - dest_idx += old_val_stride[_i] *val_idx[_i]; + for (int j = axis + 1; j < ndim ; ++j) { + dest_idx += old_val_stride[j] *val_idx[j]; } } else { - for (int _i =0; _i < val_ndim; ++_i) { - dest_idx += val_stride[_i] * val_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += val_stride[j] * val_idx[j]; } } KERNEL_ASSIGN(out_data[i], req, in_val[dest_idx]); - } else { - int idx_arr = (out_mid < index) ? out_mid : out_mid - numnew; + } else { // from 'arr' + int idx_arr = (out_idx[axis] < index) ? out_idx[axis] : out_idx[axis] - numnew; mshadow::Shape<10> arr_idx(out_idx); // i -> position in arr's shape arr_idx[axis] = idx_arr; dest_idx = 0; - for (int _i =0; _i < arr_ndim; ++_i) { - dest_idx += arr_stride[_i] * arr_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += arr_stride[j] * arr_idx[j]; } KERNEL_ASSIGN(out_data[i], req, in_arr[dest_idx]); } } }; +template +inline mshadow::Shape GetStride(const mxnet::TShape& shape) { + mshadow::Shapestride; + size_t tmp = 1; + for (int i = shape.ndim() - 1; i >= 0; --i) { + stride[i] = tmp; + tmp *= shape[i]; + } + return stride; +} + +template +inline mshadow::Shape GetKernelShape(const mxnet::TShape& shape) { + mshadow::Shapek_shape; + for (int i = 0 ; i < shape.ndim() ; ++i) { + k_shape[i] = shape[i]; + } + return k_shape; +} + template struct InsertSeqForward { + /*! + * \brief insert when obj is 'tensor' or 'slice' with more than one element. + * \tparam xpu - cpu or gpu. + * \param out_data - output: insert 'value' to 'arr' according to 'index'. + * \param in_arr - input: 'arr', original array. + * \param in_obj - input: It indicats insert position, ndim may equals to 0. + * \param in_val - input: 'value', insert to 'arr' according to 'index'. + * \param is_insert - if is_insert[out_idx[axis]] is true, it's from 'values', else from 'arr'. + * \param origin_idx - indicate the original position in 'arr' or 'values' in 'axis'. + * \param axis - insert 'value' to 'arr' in 'axis'. + * \param ndim - both 'in_arr', 'in_val' and 'out_data' have same ndim before call this. + */ template MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_val, const DType* in_arr, @@ -220,45 +267,37 @@ struct InsertSeqForward { const mshadow::Shape<10> val_stride, const mshadow::Shape<10> arr_stride, const mshadow::Shape<10> out_stride, - const int arr_ndim, const int val_ndim, - const int out_ndim, const int axis) { - const int64_t out_head = i / out_stride[axis]; - const int64_t out_mid = out_head % outshape[axis]; + const int ndim, const int axis) { + // i is the global flattened index in the output mshadow::Shape<10> out_idx; // i -> position in output's shape - for (int j = 0; j < out_ndim; ++j) { + for (int j = 0; j < ndim; ++j) { const int64_t head = i / out_stride[j]; const int64_t mid = head % outshape[j]; out_idx[j] = mid; } int64_t dest_idx; - if (is_insert[out_mid]) { - int idx_val = origin_idx[out_mid]; + if (is_insert[out_idx[axis]]) { // from 'values' + int idx_val = origin_idx[out_idx[axis]]; mshadow::Shape<10> insert_idx(out_idx); // i -> position in insert's shape insert_idx[axis] = idx_val; mshadow::Shape<10> val_idx(insert_idx); // i -> position in values's shape - for (int j = out_ndim - 1, k = val_ndim - 1; j >= 0 || k >= 0; --j, --k) { - if (j >= 0 && k >= 0) { - if (valshape[k] == 1) { - val_idx[k] = 0; - } - } else if (j >= 0) { + for (int j = ndim - 1; j >= 0; --j) { // broadcast + if (valshape[j] == 1) { val_idx[j] = 0; - } else { - break; } } dest_idx = 0; - for (int _i =0; _i < val_ndim; ++_i) { - dest_idx += val_stride[_i] * val_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += val_stride[j] * val_idx[j]; } KERNEL_ASSIGN(out_data[i], req, in_val[dest_idx]); - } else { - int idx_arr = origin_idx[out_mid]; + } else { // from 'arr' + int idx_arr = origin_idx[out_idx[axis]]; mshadow::Shape<10> arr_idx(out_idx); // i -> position in arr's shape arr_idx[axis] = idx_arr; dest_idx = 0; - for (int _i =0; _i < arr_ndim; ++_i) { - dest_idx += arr_stride[_i] * arr_idx[_i]; + for (int j =0; j < ndim; ++j) { + dest_idx += arr_stride[j] * arr_idx[j]; } out_data[i] = in_arr[dest_idx]; KERNEL_ASSIGN(out_data[i], req, in_arr[dest_idx]); @@ -338,247 +377,210 @@ struct SetOriginArrIdx { template void NumpyInsertCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - using namespace mshadow; - using namespace mxnet_op; - - const NumpyInsertParam& param = nnvm::get(attrs.parsed); - CHECK_EQ(inputs.size(), - (param.step.has_value() || param.int_ind.has_value()) ? 2U : 3U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - mshadow::Stream *s = ctx.get_stream(); - int ndim = inputs[insert_::kArr].shape_.ndim(); - int axis = param.axis.has_value() ? param.axis.value() : 0; - TBlob arr, values; - if (!param.axis.has_value()) { - arr = inputs[insert_::kArr].reshape(Shape1(inputs[insert_::kArr].shape_.Size())); - ndim = 1; - } else if (ndim == 0) { - arr = inputs[insert_::kArr]; - CHECK_EQ(inputs[insert_::kValues].shape_.ndim(), 0) - << "'arr' is a 0-d array, 'values' can not assign to it. " - << "alueError: assignment to 0-d array."; - MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { - Kernel, xpu>::Launch( - s, outputs[insert_::kOut].shape_.Size(), - outputs[insert_::kOut].dptr(), inputs[insert_::kValues].dptr()); - }); - }); - return; + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + + const NumpyInsertParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(inputs.size(), + (param.step.has_value() || param.int_ind.has_value()) ? 2U : 3U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + int ndim = inputs[insert_::kArr].shape_.ndim(); + int axis = param.axis.has_value() ? param.axis.value() : 0; + TBlob arr; + TBlob values = inputs[insert_::kValues]; + if (!param.axis.has_value()) { + arr = inputs[insert_::kArr].reshape(Shape1(inputs[insert_::kArr].shape_.Size())); + ndim = 1; + } else if (ndim == 0) { + arr = inputs[insert_::kArr]; + CHECK_EQ(inputs[insert_::kValues].shape_.ndim(), 0) + << "'arr' is a 0-d array, 'values' can not assign to it. " + << "alueError: assignment to 0-d array."; + MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { + Kernel, xpu>::Launch( // output = value + s, outputs[insert_::kOut].shape_.Size(), + outputs[insert_::kOut].dptr(), inputs[insert_::kValues].dptr()); + }); + }); + return; + } else { + arr = inputs[insert_::kArr]; + CHECK(axis >= -1 * arr.shape_.ndim() && axis < arr.shape_.ndim()) + << "Axis should be in the range of [-r, r-1] where r is the rank of input tensor"; + axis += (axis < 0) ? arr.shape_.ndim() : 0; + } + + int N = arr.shape_[axis]; + //mxnet::TShape newshape(arr.shape_); // output's shape + size_t indices_len = 0; // indices amount + int start = 0, stop = 0, step = 0; // arguments when 'obj' is 'slice' + + // get and check indices from slice or sequence of ints + if (inputs.size() == 3U) { // indices from 'tensor' + indices_len = inputs[insert_::kObj].shape_.Size(); + } else if (param.step.has_value()) { // indices from 'slice' + step = param.step.value(); + CHECK_NE(step, 0) << "'step' can not equal to 0."; + if (param.stop.has_value()) { + stop = param.stop.value(); + stop += (stop < 0) ? N : 0; + stop = (stop < 0) ? ((step < 0) ? -1 : 0) : stop; + stop = (stop >= N) ? ((step < 0) ? N - 1 : N) : stop; } else { - arr = inputs[insert_::kArr]; - CHECK(axis >= -1 * arr.shape_.ndim() && axis < arr.shape_.ndim()) - << "Axis should be in the range of [-r, r-1] where r is the rank of input tensor"; - axis += (axis < 0) ? arr.shape_.ndim() : 0; + stop = (step > 0) ? N : -1; } - - int N = arr.shape_[axis]; - mxnet::TShape newshape(arr.shape_); - size_t indices_len = 0; - int start = 0, stop = 0, step = 0; - - // get and check indices from slice or sequence of ints - if (inputs.size() == 3U) { - indices_len = inputs[insert_::kObj].shape_.Size(); - } else if (param.step.has_value()) { - step = param.step.value(); - CHECK_NE(step, 0) << "'step' can not equal to 0."; - if (param.stop.has_value()) { - stop = param.stop.value(); - stop += (stop < 0) ? N : 0; - stop = (stop < 0) ? ((step < 0) ? -1 : 0) : stop; - stop = (stop >= N) ? ((step < 0) ? N - 1 : N) : stop; - } else { - stop = (step > 0) ? N : -1; - } - if (param.start.has_value()) { - start = param.start.value(); - start += (start < 0) ? N : 0; - start = (start < 0) ? ((step < 0) ? -1 : 0) : start; - start = (start >= N) ? ((step < 0) ? N - 1 : N) : start; - } else { - start = (step > 0) ? 0 : N - 1; - } - int seq_cnt = 0; - if (step > 0 && stop >= start) { - seq_cnt = (stop - start + step - 1) / step; - } else if (step < 0 && stop <= start) { - seq_cnt = (stop - start + step + 1) / step; - } - indices_len = static_cast(seq_cnt); + if (param.start.has_value()) { + start = param.start.value(); + start += (start < 0) ? N : 0; + start = (start < 0) ? ((step < 0) ? -1 : 0) : start; + start = (start >= N) ? ((step < 0) ? N - 1 : N) : start; + } else { + start = (step > 0) ? 0 : N - 1; } - - int numnew, index = 0; - mxnet::TShape val_newshape(arr.shape_.ndim(), -1); - for (int i = inputs[insert_::kValues].shape_.ndim() - 1, j = arr.shape_.ndim() - 1; - i >= 0 || j >= 0; --i, --j) { - if (i >= 0 && j >= 0) { - val_newshape[j] = inputs[insert_::kValues].shape_[i]; - } else if (i >= 0) { - CHECK_EQ(inputs[insert_::kValues].shape_[i], 1) << "index exceed limits."; - } else { - val_newshape[j] = 1; - } + int seq_cnt = 0; + if (step > 0 && stop >= start) { + seq_cnt = (stop - start + step - 1) / step; + } else if (step < 0 && stop <= start) { + seq_cnt = (stop - start + step + 1) / step; } - values = inputs[insert_::kValues].reshape(val_newshape); + indices_len = static_cast(seq_cnt); + } - mxnet::TShape old_valshape(values.shape_); - if (param.int_ind.has_value() || - (inputs.size() == 3U && inputs[insert_::kObj].shape_.ndim() == 0)) { - if (param.int_ind.has_value()) { - index = param.int_ind.value(); - CHECK(index >= -1 * N && index <= N) - << "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'"; - if (index < 0) { - index += N; - } - } - numnew = values.shape_[0]; - - // If 'obj' is a int, then, values = moveaxis(values, 0, axis) - mxnet::TShape axes(values.ndim(), -1); - mxnet::TShape val_newshape(values.ndim(), -1); - int axes_id = 0; - for (int i = 1; i <= axis; ++i) { - axes[axes_id++] = i; - } - axes[axes_id++] = 0; - for (int i = axis + 1; i < values.ndim(); ++i) { - axes[axes_id++] = i; - } - for (int i = 0; i < values.ndim(); ++i) { - val_newshape[i] = values.shape_[axes[i]]; - } - values.shape_.assign(val_newshape.begin(), val_newshape.end()); - newshape[axis] += numnew; - } else if (indices_len == 1) { - numnew = values.shape_[axis]; - newshape[axis] += numnew; - if (param.step.has_value()) { - index = start; - CHECK(index >= -1 * N && index <= N) - << "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'"; - if (index < 0) { - index += N; - } - } + int numnew = 0; // output.shape[axis] - arr.shape[axis] + int index = 0; // modified index + mxnet::TShape val_newshape(arr.shape_.ndim(), -1); + // modify values's ndim to arr's ndim, for broadcast easily later + // e.g. value shape: (2,) arr shape: (3, 2) => value shape: (1, 2) + for (int i = values.shape_.ndim() - 1, j = arr.shape_.ndim() - 1; + i >= 0 || j >= 0; --i, --j) { + if (i >= 0 && j >= 0) { + val_newshape[j] = values.shape_[i]; + } else if (i >= 0) { + CHECK_EQ(values.shape_[i], 1) << "index exceed limits."; } else { - numnew = static_cast(indices_len); - newshape[axis] += numnew; + val_newshape[j] = 1; } + } + values.shape_.assign(val_newshape.begin(), val_newshape.end()); - const mxnet::TShape& outshape = outputs[insert_::kOut].shape_; - mshadow::Shape<10> arr_strides; - int stride = 1; - for (int i = arr.shape_.ndim() - 1; i >= 0; --i) { - arr_strides[i] = stride; - stride *= arr.shape_[i]; - } - mshadow::Shape<10> val_strides; - stride = 1; - for (int i = values.shape_.ndim() - 1; i >= 0; --i) { - val_strides[i] = stride; - stride *= values.shape_[i]; + // get numnew + mxnet::TShape old_valshape(values.shape_); + if (param.int_ind.has_value() || + (inputs.size() == 3U && inputs[insert_::kObj].shape_.ndim() == 0)) { // scaler + if (param.int_ind.has_value()) { + index = param.int_ind.value(); + CHECK(index >= -1 * N && index <= N) + << "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'"; + if (index < 0) { + index += N; + } } - mshadow::Shape<10> old_val_strides; - stride = 1; - for (int i = old_valshape.ndim() - 1; i >= 0; --i) { - old_val_strides[i] = stride; - stride *= old_valshape[i]; + + // values = moveaxis(values, 0, axis), will change values's shape + numnew = values.shape_[0]; + mxnet::TShape axes(values.ndim(), -1); // moved axes + mxnet::TShape val_newshape(values.ndim(), -1); + int axes_id = 0; + for (int i = 1; i <= axis; ++i) { + axes[axes_id++] = i; } - mshadow::Shape<10> out_strides; - stride = 1; - for (int i = outshape.ndim() - 1; i >= 0; --i) { - out_strides[i] = stride; - stride *= outshape[i]; + axes[axes_id++] = 0; + for (int i = axis + 1; i < values.ndim(); ++i) { + axes[axes_id++] = i; } - mshadow::Shape<10> k_outshape; - for (int i = 0 ; i < outshape.ndim() ; ++i) { - k_outshape[i] = outshape[i]; + for (int i = 0; i < values.ndim(); ++i) { + val_newshape[i] = values.shape_[axes[i]]; } - mshadow::Shape<10> k_valshape; - for (int i = 0 ; i < values.shape_.ndim() ; ++i) { - k_valshape[i] = values.shape_[i]; + values.shape_.assign(val_newshape.begin(), val_newshape.end()); + } else if (indices_len == 1) { // tensor with only one element + numnew = values.shape_[axis]; + if (param.step.has_value()) { + index = start; + CHECK(index >= -1 * N && index <= N) + << "Index should be in the range of [-r, r-1] where r is the dim size in 'axis'"; + if (index < 0) { + index += N; + } } + } else { + numnew = static_cast(indices_len); + } - if (param.int_ind.has_value()) { - MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { + const mxnet::TShape& outshape = outputs[insert_::kOut].shape_; + mshadow::Shape<10> arr_strides = GetStride<10>(arr.shape_); + mshadow::Shape<10> val_strides = GetStride<10>(values.shape_); + mshadow::Shape<10> old_val_strides = GetStride<10>(old_valshape); + mshadow::Shape<10> out_strides = GetStride<10>(outshape); + mshadow::Shape<10> k_outshape = GetKernelShape<10>(outshape); + for (int i = 0 ; i < outshape.ndim() ; ++i) { + k_outshape[i] = outshape[i]; + } + mshadow::Shape<10> k_valshape = GetKernelShape<10>(values.shape_); + for (int i = 0 ; i < values.shape_.ndim() ; ++i) { + k_valshape[i] = values.shape_[i]; + } + MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { + MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { + MSHADOW_TYPE_SWITCH((inputs.size() == 3U) ? + inputs[insert_::kObj].type_flag_ : + mshadow::DataType::kFlag, IType, { + if (param.int_ind.has_value()) { Kernel, xpu>::Launch(s, outshape.Size(), - outputs[insert_::kOut].dptr(), - values.dptr(), arr.dptr(), - k_outshape, k_valshape, index, numnew, - val_strides, old_val_strides, arr_strides, - out_strides, arr.shape_.ndim(), - values.shape_.ndim(), outshape.ndim(), - axis, true); - }); - }); - } else if (inputs.size() == 3U && inputs[insert_::kObj].shape_.ndim() == 0) { - MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { - MSHADOW_TYPE_SWITCH(inputs[insert_::kObj].type_flag_, IType, { - Kernel, xpu>::Launch(s, outshape.Size(), - outputs[insert_::kOut].dptr(), - values.dptr(), arr.dptr(), - k_outshape, k_valshape, N, - inputs[insert_::kObj].dptr(), numnew, - val_strides, old_val_strides, arr_strides, - out_strides, arr.shape_.ndim(), - values.shape_.ndim(), outshape.ndim(), - axis, true); - }); - }); - }); - } else if (indices_len == 1) { - MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { + outputs[insert_::kOut].dptr(), + values.dptr(), arr.dptr(), + k_outshape, k_valshape, index, numnew, + val_strides, old_val_strides, arr_strides, + out_strides, outshape.ndim(), + axis, true); + } else if (inputs.size() == 3U && inputs[insert_::kObj].shape_.ndim() == 0) { + Kernel, xpu>::Launch(s, outshape.Size(), + outputs[insert_::kOut].dptr(), + values.dptr(), arr.dptr(), + k_outshape, k_valshape, N, + inputs[insert_::kObj].dptr(), numnew, + val_strides, old_val_strides, arr_strides, + out_strides, outshape.ndim(), + axis, true); + } else if (indices_len == 1) { if (param.step.has_value()) { Kernel, xpu>::Launch(s, outshape.Size(), outputs[insert_::kOut].dptr(), values.dptr(), arr.dptr(), k_outshape, k_valshape, start, numnew, val_strides, old_val_strides, arr_strides, out_strides, - arr.shape_.ndim(), values.shape_.ndim(), outshape.ndim(), axis, false); } else { - MSHADOW_TYPE_SWITCH(inputs[insert_::kObj].type_flag_, IType, { - Kernel, xpu>::Launch(s, outshape.Size(), - outputs[insert_::kOut].dptr(), - values.dptr(), arr.dptr(), - k_outshape, k_valshape, - N, inputs[insert_::kObj].dptr(), numnew, - val_strides, old_val_strides, - arr_strides, out_strides, - arr.shape_.ndim(), values.shape_.ndim(), - outshape.ndim(), axis, false); - }); + Kernel, xpu>::Launch(s, outshape.Size(), + outputs[insert_::kOut].dptr(), + values.dptr(), arr.dptr(), + k_outshape, k_valshape, + N, inputs[insert_::kObj].dptr(), numnew, + val_strides, old_val_strides, + arr_strides, out_strides, + outshape.ndim(), axis, false); } - }); - }); - } else { - // broadcast check - for (int i = outshape.ndim() - 1; i >= 0; --i) { + } else { + // broadcast check + for (int i = outshape.ndim() - 1; i >= 0; --i) { int sz = outshape[i]; if (i == axis) { - sz = numnew; + sz = numnew; } CHECK((values.shape_[i] == 1) || (values.shape_[i] == sz)); - } - size_t temp_storage_bytes, temp_mem_size; - MSHADOW_TYPE_SWITCH((inputs.size() == 3U) ? - inputs[insert_::kObj].type_flag_ : - mshadow::DataType::kFlag, IType, { + } + size_t temp_storage_bytes, temp_mem_size; temp_storage_bytes = SortByKeyWorkspaceSize(indices_len, false, true); temp_mem_size = indices_len * sizeof(IType) * 2 + - indices_len * sizeof(int) + - newshape[axis] * sizeof(int) * 2 + - temp_storage_bytes; + indices_len * sizeof(int) + + outshape[axis] * sizeof(int) * 2 + + temp_storage_bytes; Tensor temp_mem = ctx.requested[0].get_space_typed(Shape1(temp_mem_size), s); IType* indices_ptr = reinterpret_cast(temp_mem.dptr_); @@ -588,9 +590,9 @@ void NumpyInsertCompute(const nnvm::NodeAttrs& attrs, int* is_insert = reinterpret_cast(temp_mem.dptr_ + indices_len * sizeof(IType) * 2 + indices_len * sizeof(int)); int* origin_idx = reinterpret_cast(temp_mem.dptr_ + indices_len * sizeof(IType) * 2 - + indices_len * sizeof(int) + newshape[axis] * sizeof(int)); + + indices_len * sizeof(int) + outshape[axis] * sizeof(int)); Tensor temp_storage(temp_mem.dptr_ + indices_len * sizeof(IType) * 2 - + indices_len * sizeof(int) + newshape[axis] * sizeof(int) * 2, + + indices_len * sizeof(int) + outshape[axis] * sizeof(int) * 2, Shape1(temp_storage_bytes), s); Tensor indices(indices_ptr, Shape1(indices_len), s); Tensor sorted_indices(sorted_indices_ptr, Shape1(indices_len), s); @@ -598,38 +600,31 @@ void NumpyInsertCompute(const nnvm::NodeAttrs& attrs, int num_bits = common::ilog2ui(static_cast(indices_len) - 1); if (param.step.has_value()) { - Kernel::Launch(s, indices_len, - indices_ptr, N, - start, step); + Kernel::Launch(s, indices_len, indices_ptr, N, start, step); } else { - Kernel::Launch(s, indices_len, - indices_ptr, N, - inputs[insert_::kObj].dptr()); + Kernel::Launch(s, indices_len, indices_ptr, N, + inputs[insert_::kObj].dptr()); } - Kernel::Launch(s, indices_len, order_ptr); mxnet::op::SortByKey(indices, order, true, &temp_storage, 0, num_bits, &sorted_indices); Kernel::Launch(s, indices_len, indices_ptr, order_ptr); - Kernel::Launch(s, newshape[axis], is_insert); + Kernel::Launch(s, outshape[axis], is_insert); Kernel::Launch(s, indices_len, indices_ptr, is_insert); Kernel::Launch(s, indices_len, indices_ptr, origin_idx); - Kernel::Launch(s, newshape[axis], is_insert, origin_idx); - - MSHADOW_TYPE_SWITCH(outputs[insert_::kOut].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[insert_::kOut], req_type, { - Kernel, xpu>::Launch(s, outshape.Size(), - outputs[insert_::kOut].dptr(), - values.dptr(), arr.dptr(), - k_outshape, k_valshape, is_insert, origin_idx, - val_strides, arr_strides, out_strides, - arr.shape_.ndim(), values.shape_.ndim(), - outshape.ndim(), axis); - }); - }); - }); - } + Kernel::Launch(s, outshape[axis], is_insert, origin_idx); + + Kernel, xpu>::Launch(s, outshape.Size(), + outputs[insert_::kOut].dptr(), + values.dptr(), arr.dptr(), + k_outshape, k_valshape, is_insert, origin_idx, + val_strides, arr_strides, out_strides, + outshape.ndim(), axis); + } + }); + }); + }); } } // namespace op diff --git a/src/operator/numpy/np_insert_op.cc b/src/operator/numpy/np_insert_op.cc index 1f4151883a3e..5fa23a510826 100644 --- a/src/operator/numpy/np_insert_op.cc +++ b/src/operator/numpy/np_insert_op.cc @@ -42,7 +42,7 @@ bool NumpyInsertType(const nnvm::NodeAttrs& attrs, CHECK_NE((*in_type)[2], -1) << "Index type must be set for insert operator\n"; CHECK(((*in_type)[2] == mshadow::DataType::kFlag) || ((*in_type)[2] == mshadow::DataType::kFlag)) - << "Index type only support int32 or int64.\n"; + << "Index type only support int32 or int64.\n"; } TYPE_ASSIGN_CHECK(*out_type, 0, (*in_type)[0]); TYPE_ASSIGN_CHECK(*out_type, 0, (*in_type)[1]); @@ -62,7 +62,7 @@ bool NumpyInsertShape(const nnvm::NodeAttrs& attrs, mxnet::TShape &objShape = (*in_shape)[insert_::kObj]; if (in_shape->size() == 3U) { CHECK_LE(objShape.ndim(), 1) - << "index array argument obj to insert must be one dimensional or scale.\n"; + << "index array argument obj to insert must be one dimensional or scale.\n"; } out_shape->clear(); @@ -117,7 +117,9 @@ bool NumpyInsertShape(const nnvm::NodeAttrs& attrs, mxnet::TShape newshape(arrshape); mxnet::TShape val_newshape(arrshape.ndim(), -1); - int numnew; + int numnew = 0; // amount of new column insert to 'arr' in 'axis' + // modify values's ndim to arr's ndim, for broadcast easily later + // e.g. value shape: (2,) arr shape: (3, 2) => value shape: (1, 2) for (int i = valshape.ndim() - 1, j = arrshape.ndim() - 1; i >= 0 || j >= 0; --i, --j) { if (i >= 0 && j >= 0) { val_newshape[j] = valshape[i];