From e91927ef76113c9c589e8bd4a0d4cb12cd4e5bdc Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Sun, 23 Jun 2019 12:27:16 +0800 Subject: [PATCH] numpy-compatible cumsum (#15309) --- src/operator/numpy/np_cumsum-inl.h | 184 +++++++++++++++++++++++++ src/operator/numpy/np_cumsum.cc | 92 +++++++++++++ src/operator/numpy/np_cumsum.cu | 37 +++++ tests/python/unittest/test_numpy_op.py | 42 ++++++ 4 files changed, 355 insertions(+) create mode 100644 src/operator/numpy/np_cumsum-inl.h create mode 100644 src/operator/numpy/np_cumsum.cc create mode 100644 src/operator/numpy/np_cumsum.cu diff --git a/src/operator/numpy/np_cumsum-inl.h b/src/operator/numpy/np_cumsum-inl.h new file mode 100644 index 000000000000..a9d2d8b43681 --- /dev/null +++ b/src/operator/numpy/np_cumsum-inl.h @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum-inl.h + * \brief Function definition of numpy-compatible cumsum operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ + +#include +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" + +namespace mxnet { +namespace op { + +struct CumsumParam : public dmlc::Parameter { + dmlc::optional axis; + dmlc::optional dtype; + DMLC_DECLARE_PARAMETER(CumsumParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional()) + .describe("Axis along which the cumulative sum is computed." + " The default (None) is to compute the cumsum over the flattened array."); + DMLC_DECLARE_FIELD(dtype) + .add_enum("float16", mshadow::kFloat16) + .add_enum("float32", mshadow::kFloat32) + .add_enum("float64", mshadow::kFloat64) + .add_enum("int8", mshadow::kInt8) + .add_enum("int32", mshadow::kInt32) + .add_enum("int64", mshadow::kInt64) + .set_default(dmlc::optional()) + .describe("Type of the returned array and of the accumulator in which the elements" + " are summed. If dtype is not specified, it defaults to the dtype of a," + " unless a has an integer dtype with a precision less than that of the" + " default platform integer. In that case, the default platform integer is used."); + } +}; + +struct cumsum_forward { + template + MSHADOW_XINLINE static void Map(int i, + OType *out, + const IType *in, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const IType *lane_in = in + offset; + OType *lane_out = out + offset; + lane_out[0] = OType(lane_in[0]); + for (int j = 1; j < middle; ++j) { + lane_out[j * trailing] = lane_out[(j - 1) * trailing] + OType(lane_in[j * trailing]); + } + } +}; + +template +void CumsumForwardImpl(const OpContext& ctx, + const TBlob& in, + const TBlob& out, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + + int middle = axis.has_value() ? out.shape_[axis.value()] : out.Size(); + if (middle == 0 || out.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < out.shape_.ndim(); ++i) { + trailing *= out.shape_[i]; + } + } + + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(in.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(out.type_flag_, OType, { + Kernel::Launch( + s, out.Size() / middle, out.dptr(), + in.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumForwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +struct cumsum_backward { + template + MSHADOW_XINLINE static void Map(int i, + IType *igrad, + const OType *ograd, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const OType *lane_ograd = ograd + offset; + IType *lane_igrad = igrad + offset; + lane_igrad[(middle - 1) * trailing] = IType(lane_ograd[(middle - 1) * trailing]); + for (int j = middle - 2; j >= 0; --j) { + lane_igrad[j * trailing] = lane_igrad[(j + 1) * trailing] + IType(lane_ograd[j * trailing]); + } + } +}; + +template +void CumsumBackwardImpl(const OpContext& ctx, + const TBlob& ograd, + const TBlob& igrad, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + int middle = axis.has_value() ? igrad.shape_[axis.value()] : igrad.Size(); + if (middle == 0 || igrad.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < igrad.shape_.ndim(); ++i) { + trailing *= igrad.shape_[i]; + } + } + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(igrad.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(ograd.type_flag_, OType, { + Kernel::Launch( + s, igrad.Size() / middle, igrad.dptr(), + ograd.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumBackwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ diff --git a/src/operator/numpy/np_cumsum.cc b/src/operator/numpy/np_cumsum.cc new file mode 100644 index 000000000000..8f16f25234ba --- /dev/null +++ b/src/operator/numpy/np_cumsum.cc @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cc + * \brief CPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +inline bool CumsumShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.axis.has_value()) { + return ElemwiseShape<1, 1>(attrs, in_attrs, out_attrs); + } else { + TShape out_shape(1, in_attrs->at(0).Size()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + return shape_is_known(out_attrs->at(0)); + } +} + +inline bool CumsumType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(CumsumParam); + +NNVM_REGISTER_OP(_np_cumsum) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FInferShape", CumsumShape) +.set_attr("FInferType", CumsumType) +.set_attr("FCompute", CumsumForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_cumsum"}) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.add_argument("a", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(CumsumParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_cumsum.cu b/src/operator/numpy/np_cumsum.cu new file mode 100644 index 000000000000..cc574ebf72c5 --- /dev/null +++ b/src/operator/numpy/np_cumsum.cu @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cu + * \brief GPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_cumsum) +.set_attr("FCompute", CumsumForward); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 3ce04409bfce..7a43083e9b86 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -977,6 +977,48 @@ def get_indices(axis_size): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) +@with_seed() +@npx.use_np_shape +def test_np_cumsum(): + def np_cumsum_backward(ograd, axis=None, dtype=None): + return _np.flip(_np.cumsum(_np.flip(ograd, axis=axis), axis=axis, dtype=dtype), axis=axis) + + @npx.use_np_shape + class TestCumsum(HybridBlock): + def __init__(self, axis=None, dtype=None): + super(TestCumsum, self).__init__() + self._axis = axis + self._dtype = dtype + + def hybrid_forward(self, F, a): + return F.np.cumsum(a, axis=self._axis, dtype=self._dtype) + + shapes = [(2, 3, 4), (2, 0, 3), ()] + for hybridize in [True, False]: + for shape in shapes: + for axis in [None] + [i for i in range(0, len(shape))]: + for otype in [None, _np.float32, _np.float64]: + test_cumsum = TestCumsum(axis=axis, dtype=otype) + if hybridize: + test_cumsum.hybridize() + for itype in [_np.float16, _np.float32, _np.float64]: + x = rand_ndarray(shape).astype(itype).as_np_ndarray() + x.attach_grad() + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + with mx.autograd.record(): + mx_out = test_cumsum(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + mx_out.backward() + np_backward = np_cumsum_backward(_np.ones(np_out.shape, dtype=otype), + axis=axis, dtype=otype).reshape(x.shape) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + + mx_out = np.cumsum(x, axis=axis, dtype=otype) + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + @with_seed() @npx.use_np_shape def test_np_tile():