diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index ac97c20a8a5a..d00fbec9ffe9 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -3605,8 +3605,8 @@ def median(a, axis=None, out=None, keepdims=False): ------- median : ndarray A new array holding the result. If the input contains integers - or floats smaller than ``float64``, then the output data-type is - ``np.float64``. Otherwise, the data-type of the output is the + or floats smaller than ``float32``, then the output data-type is + ``np.float32``. Otherwise, the data-type of the output is the same as that of the input. If `out` is specified, that array is returned instead. diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index d66650884066..9a3d5d21915b 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -5368,8 +5368,8 @@ def median(a, axis=None, out=None, keepdims=False): ------- median : ndarray A new array holding the result. If the input contains integers - or floats smaller than ``float64``, then the output data-type is - ``np.float64``. Otherwise, the data-type of the output is the + or floats smaller than ``float32``, then the output data-type is + ``np.float32``. Otherwise, the data-type of the output is the same as that of the input. If `out` is specified, that array is returned instead. diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index c15760b4dc58..3585c150ae54 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -3457,8 +3457,8 @@ def median(a, axis=None, out=None, keepdims=False): ------- median : _Symbol A new array holding the result. If the input contains integers - or floats smaller than ``float64``, then the output data-type is - ``np.float64``. Otherwise, the data-type of the output is the + or floats smaller than ``float32``, then the output data-type is + ``np.float32``. Otherwise, the data-type of the output is the same as that of the input. If `out` is specified, that array is returned instead. diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 702b6504a7fc..edc2f2adf4c2 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -274,7 +274,7 @@ inline void SetDependency(const nnvm::NodeAttrs& attrs, LOG(FATAL) << "resource type not yet supported"; } } - CHECK_LE(ntmp, 2) << "Only support 1 temp space request"; + CHECK_LE(ntmp, 1) << "Only support 1 temp space request"; } // append extra resource requests for storage fallback diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index 063fe17c1c7c..ac1d1611dcff 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -25,30 +25,16 @@ #ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ #define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ +#include #include #include #include -#include #include "../nn/moments-inl.h" #include "../tensor/broadcast_reduce_op.h" #include "../tensor/elemwise_binary_broadcast_op.h" #include "../tensor/ordering_op-inl.h" #include "../tensor/matrix_op-inl.h" -#include -#include -#include -#include -#include -#include -#include "../mshadow_op.h" -#include "../elemwise_op_common.h" -#include "../tensor/sort_op.h" -#include "../tensor/indexing_op.h" -#include "/home/ubuntu/incubator-mxnet/3rdparty/mshadow/mshadow/extension/transpose.h" -#include "/home/ubuntu/incubator-mxnet/3rdparty/mshadow/mshadow/extension/reshape.h" - - namespace mxnet { namespace op { @@ -994,7 +980,6 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, } MSHADOW_TYPE_SWITCH(a.type_flag_, DType, { - using namespace mshadow; using namespace mshadow::expr; Tensor workspace; Tensor temp_workspace; @@ -1037,12 +1022,10 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, trans_ptr = reinterpret_cast(temp_mem.dptr_); sort_ptr = reinterpret_cast(temp_mem.dptr_ + temp_data_size); idx_ptr = reinterpret_cast(temp_mem.dptr_ + 2 * temp_data_size); - //workspace_curr_ptr = reinterpret_cast(temp_mem.dptr_ + temp_mem_size); } else { idx_ptr = reinterpret_cast(temp_mem.dptr_); trans_ptr = reinterpret_cast(temp_mem.dptr_ + idx_size); sort_ptr = reinterpret_cast(temp_mem.dptr_ + temp_data_size + idx_size); - //workspace_curr_ptr = reinterpret_cast(temp_mem.dptr_ + temp_mem_size); } workspace_curr_ptr = temp_mem.dptr_ + 2 * temp_data_size + idx_size; @@ -1059,18 +1042,14 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, TBlob a_sort = TBlob(sort_ptr, t_shape, xpu::kDevMask); TBlob a_idx = TBlob(idx_ptr, t_shape, xpu::kDevMask); - /*TopKImpl(ctx.run_ctx, - ctx.requested[1], {kWriteTo, kNullOp}, a_trans.reshape(t_shape), - {a_sort, a_idx}, - topk_param);*/ - // input std::vector req_TopK = {kWriteTo, kNullOp}; TBlob src = a_trans.reshape(t_shape); std::vector ret = {a_sort, a_idx}; TopKParam parameter = topk_param; ParseTopKParam(src.shape_, parameter, - &target_shape, &batch_size, &element_num, &axis_topk, &k, &do_transpose, &is_ascend); + &target_shape, &batch_size, &element_num, &axis_topk, + &k, &do_transpose, &is_ascend); CHECK_LE(element_num, mxnet::common::MaxIntegerValue()) << "'index_t' does not have a sufficient precision to represent " << "the indices of the input array. The total element_num is " @@ -1079,61 +1058,53 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, Tensor dat = src.FlatTo3D(axis_topk, axis_topk, s); sorted_dat = Tensor(reinterpret_cast(workspace_curr_ptr), - Shape1(src.Size()), s); // contain sorted dat - workspace_curr_ptr += PadBytes(sizeof(DType) * src.Size(), alignment); - indices = Tensor(reinterpret_cast(workspace_curr_ptr), - Shape1(src.Size()), s); // indices in the original matrix - workspace_curr_ptr += PadBytes(sizeof(index_t) * src.Size(), alignment); - - if (parameter.ret_typ == topk_enum::kReturnMask) { - sel_indices = Tensor(reinterpret_cast(workspace_curr_ptr), - Shape1(batch_size * k), s); - workspace_curr_ptr += PadBytes(sizeof(index_t) * batch_size * k, alignment); - CHECK_EQ(sel_indices.CheckContiguous(), true); - } + Shape1(src.Size()), s); // contain sorted dat + workspace_curr_ptr += PadBytes(sizeof(DType) * src.Size(), alignment); + indices = Tensor(reinterpret_cast(workspace_curr_ptr), + Shape1(src.Size()), s); // indices in the original matrix + workspace_curr_ptr += PadBytes(sizeof(index_t) * src.Size(), alignment); - if (std::is_same::value) { - Tensor flattened_data; - if (do_transpose) { - flattened_data = Tensor(reinterpret_cast(workspace_curr_ptr), - Shape1(src.Size()), s); - workspace_curr_ptr += sizeof(DType) * src.Size(); - flattened_data = reshape(transpose(dat, Shape3(0, 2, 1)), Shape1(src.Size())); - CHECK_EQ(flattened_data.CheckContiguous(), true); - } else { - flattened_data = src.FlatTo1D(s); - } - // `temp_workspace` stores the flattened data - temp_workspace = Tensor(reinterpret_cast(flattened_data.dptr_), - Shape1(sizeof(DType)*src.Size()), s); - CHECK_EQ(temp_workspace.CheckContiguous(), true); - } else { - if (do_transpose) { - sorted_dat = reshape(transpose(dat, Shape3(0, 2, 1)), Shape1(src.Size())); + if (parameter.ret_typ == topk_enum::kReturnMask) { + sel_indices = Tensor(reinterpret_cast(workspace_curr_ptr), + Shape1(batch_size * k), s); + workspace_curr_ptr += PadBytes(sizeof(index_t) * batch_size * k, alignment); + CHECK_EQ(sel_indices.CheckContiguous(), true); + } + + if (std::is_same::value) { + Tensor flattened_data; + if (do_transpose) { + flattened_data = Tensor(reinterpret_cast(workspace_curr_ptr), + Shape1(src.Size()), s); + workspace_curr_ptr += sizeof(DType) * src.Size(); + flattened_data = reshape(transpose(dat, Shape3(0, 2, 1)), Shape1(src.Size())); + CHECK_EQ(flattened_data.CheckContiguous(), true); + } else { + flattened_data = src.FlatTo1D(s); + } + // `temp_workspace` stores the flattened data + temp_workspace = Tensor(reinterpret_cast(flattened_data.dptr_), + Shape1(sizeof(DType)*src.Size()), s); + CHECK_EQ(temp_workspace.CheckContiguous(), true); } else { - sorted_dat = reshape(dat, Shape1(src.Size())); + if (do_transpose) { + sorted_dat = reshape(transpose(dat, Shape3(0, 2, 1)), Shape1(src.Size())); + } else { + sorted_dat = reshape(dat, Shape1(src.Size())); + } + CHECK_EQ(sorted_dat.CheckContiguous(), true); + temp_workspace = Tensor(workspace_curr_ptr, Shape1(temp_size), s); + workspace_curr_ptr += temp_size; } - CHECK_EQ(sorted_dat.CheckContiguous(), true); - temp_workspace = Tensor(workspace_curr_ptr, Shape1(temp_size), s); // temp space - workspace_curr_ptr += temp_size; - } mxnet_op::Kernel::Launch(s, batch_size * element_num, 1, index_t{0}, index_t{1}, kWriteTo, indices.dptr_); CHECK_EQ(indices.CheckContiguous(), true); // 2. Perform inplace batch sort. - // After sorting, each batch in `sorted_dat` will be sorted in the corresponding order - // up to the k-th element and the `indices` will contain the corresponding index in `sorted_dat` - // `temp_workspace` is used to store the flattend source data for CPU device, and it's used as - // a temporal buffer for GPU device. TopKSort(sorted_dat, indices, temp_workspace, k, element_num, is_ascend, s); // 3. Assign results to the ret blob - // When returning indices, only update(modulo) required elements instead of full elements - // to avoid redundant calculation. - // Cast `ret_indices` from int to real_t could introduce conversion error when the element_num - // is large enough. if (parameter.ret_typ == topk_enum::kReturnMask) { Tensor ret_mask = ret[0].FlatTo1D(s); ret_mask = scalar(0); @@ -1145,8 +1116,8 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, if (do_transpose) { mxnet::TShape src_shape = src.shape_.FlatTo3D(axis_topk); CHECK_EQ(sel_indices.CheckContiguous(), true); - sel_indices = transpose_indices(sel_indices, Shape3(src_shape[0], src_shape[2], src_shape[1]), - Shape3(0, 2, 1)); + sel_indices = transpose_indices(sel_indices, Shape3(src_shape[0], src_shape[2], + src_shape[1]), Shape3(0, 2, 1)); } if (req_TopK[0] == kNullOp) { return; @@ -1158,7 +1129,8 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, } } else if (parameter.ret_typ == topk_enum::kReturnIndices) { if (do_transpose) { - Tensor ret_indices = ret[0].FlatTo3D(axis_topk, axis_topk, s); + Tensor ret_indices = ret[0].FlatTo3D(axis_topk, + axis_topk, s); ASSIGN_DISPATCH(ret_indices, req_TopK[0], tcast(F(transpose( slice<2>(inplace_reshape(indices, Shape3(ret_indices.shape_[0], @@ -1176,11 +1148,12 @@ void NumpyMedianForward(const nnvm::NodeAttrs& attrs, } else { if (do_transpose) { Tensor ret_value = ret[0].FlatTo3D(axis_topk, axis_topk, s); - Tensor ret_indices = ret[1].FlatTo3D(axis_topk, axis_topk, s); + Tensor ret_indices = ret[1].FlatTo3D(axis_topk, + axis_topk, s); ASSIGN_DISPATCH(ret_value, req_TopK[0], transpose( slice<2>(inplace_reshape(sorted_dat, - Shape3(ret_value.shape_[0], ret_value.shape_[2], element_num)), - 0, k), Shape3(0, 2, 1))); + Shape3(ret_value.shape_[0], ret_value.shape_[2], + element_num)), 0, k), Shape3(0, 2, 1))); ASSIGN_DISPATCH(ret_indices, req_TopK[1], tcast(F(transpose( slice<2>(inplace_reshape(indices, Shape3(ret_indices.shape_[0], diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc index 9eab4966eb97..de17858f3d87 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cc +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -327,8 +327,7 @@ inline bool NumpyMedianType(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat32); return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; } @@ -347,7 +346,7 @@ NNVM_REGISTER_OP(_npi_median) .set_attr("FCompute", NumpyMedianForward) .set_attr("FResourceRequest", [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace, ResourceRequest::kTempSpace}; + return std::vector{ResourceRequest::kTempSpace}; }) // .set_attr("THasDeterministicOutput", true) .set_attr("FGradient", MakeZeroGradNodes);