From 61f3dbc8daa5e29010447c4d6b929908302cfcd3 Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Fri, 30 Aug 2019 05:40:57 +0800 Subject: [PATCH] numpy-compatible cumsum upstream (#15924) --- python/mxnet/_numpy_op_doc.py | 51 +++++++ src/operator/numpy/np_cumsum-inl.h | 188 +++++++++++++++++++++++++ src/operator/numpy/np_cumsum.cc | 94 +++++++++++++ src/operator/numpy/np_cumsum.cu | 37 +++++ tests/python/unittest/test_numpy_op.py | 43 +++++- 5 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 src/operator/numpy/np_cumsum-inl.h create mode 100644 src/operator/numpy/np_cumsum.cc create mode 100644 src/operator/numpy/np_cumsum.cu diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index 5543ebc8e8c9..4e5ea74c2771 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -52,3 +52,54 @@ def _np_zeros_like(a): Array of zeros with the same shape and type as `a`. """ pass + + +def _np_cumsum(a, axis=None, dtype=None, out=None): + """Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`, unless `a` has an integer dtype with a + precision less than that of the default platform integer. In + that case, the default platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. See `doc.ufuncs` + (Section "Output arguments") for more details. + + Returns + ------- + cumsum_along_axis : ndarray. + A new array holding the result is returned unless `out` is + specified, in which case a reference to `out` is returned. The + result has the same size as `a`, and the same shape as `a` if + `axis` is not None or `a` is a 1-d array. + + Examples + -------- + >>> a = np.array([[1,2,3], [4,5,6]]) + >>> a + array([[1, 2, 3], + [4, 5, 6]]) + >>> np.cumsum(a) + array([ 1, 3, 6, 10, 15, 21]) + >>> np.cumsum(a, dtype=float) # specifies type of output value(s) + array([ 1., 3., 6., 10., 15., 21.]) + >>> np.cumsum(a,axis=0) # sum over rows for each of the 3 columns + array([[1, 2, 3], + [5, 7, 9]]) + >>> np.cumsum(a,axis=1) # sum over columns for each of the 2 rows + array([[ 1, 3, 6], + [ 4, 9, 15]]) + + """ + pass diff --git a/src/operator/numpy/np_cumsum-inl.h b/src/operator/numpy/np_cumsum-inl.h new file mode 100644 index 000000000000..6c6b56d46e76 --- /dev/null +++ b/src/operator/numpy/np_cumsum-inl.h @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum-inl.h + * \brief Function definition of numpy-compatible cumsum operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ + +#include +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" + +namespace mxnet { +namespace op { + +struct CumsumParam : public dmlc::Parameter { + dmlc::optional axis; + dmlc::optional dtype; + DMLC_DECLARE_PARAMETER(CumsumParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional()) + .describe("Axis along which the cumulative sum is computed." + " The default (None) is to compute the cumsum over the flattened array."); + DMLC_DECLARE_FIELD(dtype) + .add_enum("float16", mshadow::kFloat16) + .add_enum("float32", mshadow::kFloat32) + .add_enum("float64", mshadow::kFloat64) + .add_enum("int8", mshadow::kInt8) + .add_enum("int32", mshadow::kInt32) + .add_enum("int64", mshadow::kInt64) + .set_default(dmlc::optional()) + .describe("Type of the returned array and of the accumulator in which the elements" + " are summed. If dtype is not specified, it defaults to the dtype of a," + " unless a has an integer dtype with a precision less than that of the" + " default platform integer. In that case, the default platform integer is used."); + } +}; + +struct cumsum_forward { + template + MSHADOW_XINLINE static void Map(int i, + OType *out, + const IType *in, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const IType *lane_in = in + offset; + OType *lane_out = out + offset; + lane_out[0] = OType(lane_in[0]); + for (int j = 1; j < middle; ++j) { + lane_out[j * trailing] = lane_out[(j - 1) * trailing] + OType(lane_in[j * trailing]); + } + } +}; + +template +void CumsumForwardImpl(const OpContext& ctx, + const TBlob& in, + const TBlob& out, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + + CHECK(!axis.has_value() || + ((axis.value() >= -out.shape_.ndim()) && axis.value() < out.shape_.ndim())) + << "axis value " << axis.value() << " out of range"; + + int middle = axis.has_value() ? out.shape_[axis.value()] : out.Size(); + if (middle == 0 || out.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < out.shape_.ndim(); ++i) { + trailing *= out.shape_[i]; + } + } + + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(in.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(out.type_flag_, OType, { + Kernel::Launch( + s, out.Size() / middle, out.dptr(), + in.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumForwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +struct cumsum_backward { + template + MSHADOW_XINLINE static void Map(int i, + IType *igrad, + const OType *ograd, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const OType *lane_ograd = ograd + offset; + IType *lane_igrad = igrad + offset; + lane_igrad[(middle - 1) * trailing] = IType(lane_ograd[(middle - 1) * trailing]); + for (int j = middle - 2; j >= 0; --j) { + lane_igrad[j * trailing] = lane_igrad[(j + 1) * trailing] + IType(lane_ograd[j * trailing]); + } + } +}; + +template +void CumsumBackwardImpl(const OpContext& ctx, + const TBlob& ograd, + const TBlob& igrad, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + int middle = axis.has_value() ? igrad.shape_[axis.value()] : igrad.Size(); + if (middle == 0 || igrad.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < igrad.shape_.ndim(); ++i) { + trailing *= igrad.shape_[i]; + } + } + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(igrad.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(ograd.type_flag_, OType, { + Kernel::Launch( + s, igrad.Size() / middle, igrad.dptr(), + ograd.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumBackwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ diff --git a/src/operator/numpy/np_cumsum.cc b/src/operator/numpy/np_cumsum.cc new file mode 100644 index 000000000000..0ddbf521186c --- /dev/null +++ b/src/operator/numpy/np_cumsum.cc @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cc + * \brief CPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +inline bool CumsumShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.axis.has_value()) { + return ElemwiseShape<1, 1>(attrs, in_attrs, out_attrs); + } else { + TShape out_shape(1, in_attrs->at(0).Size()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + return shape_is_known(out_attrs->at(0)); + } +} + +inline bool CumsumType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(CumsumParam); + +NNVM_REGISTER_OP(_np_cumsum) +.add_alias("cumsum") +.describe(R"code(Return the cumulative sum of the elements along a given axis.)code" ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FInferShape", CumsumShape) +.set_attr("FInferType", CumsumType) +.set_attr("FCompute", CumsumForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_cumsum"}) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.add_argument("a", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(CumsumParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_cumsum.cu b/src/operator/numpy/np_cumsum.cu new file mode 100644 index 000000000000..cc574ebf72c5 --- /dev/null +++ b/src/operator/numpy/np_cumsum.cu @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cu + * \brief GPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_cumsum) +.set_attr("FCompute", CumsumForward); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index c468a6b7a0fa..b8b9ca75a173 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -39,7 +39,7 @@ class TestTensordot(HybridBlock): def __init__(self, axes): super(TestTensordot, self).__init__() self._axes = axes - + def hybrid_forward(self, F, a, b): return F.np.tensordot(a, b, self._axes) @@ -1365,6 +1365,47 @@ def test_random_seed(): assert_almost_equal(ret[0].asnumpy(), ret[1].asnumpy(), rtol=1e-4, atol=1e-5, use_broadcast=False) +@with_seed() +@use_np +def test_np_cumsum(): + def np_cumsum_backward(ograd, axis=None, dtype=None): + return _np.flip(_np.cumsum(_np.flip(ograd, axis=axis), axis=axis, dtype=dtype), axis=axis) + + class TestCumsum(HybridBlock): + def __init__(self, axis=None, dtype=None): + super(TestCumsum, self).__init__() + self._axis = axis + self._dtype = dtype + + def hybrid_forward(self, F, a): + return F.np.cumsum(a, axis=self._axis, dtype=self._dtype) + + shapes = [(2, 3, 4), (2, 0, 3), ()] + for hybridize in [True, False]: + for shape in shapes: + for axis in [None] + [i for i in range(0, len(shape))]: + for otype in [None, _np.float32, _np.float64]: + test_cumsum = TestCumsum(axis=axis, dtype=otype) + if hybridize: + test_cumsum.hybridize() + for itype in [_np.float16, _np.float32, _np.float64]: + x = rand_ndarray(shape).astype(itype).as_np_ndarray() + x.attach_grad() + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + with mx.autograd.record(): + mx_out = test_cumsum(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + mx_out.backward() + np_backward = np_cumsum_backward(_np.ones(np_out.shape, dtype=otype), + axis=axis, dtype=otype).reshape(x.shape) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + + mx_out = np.cumsum(x, axis=axis, dtype=otype) + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + if __name__ == '__main__': import nose nose.runmodule()