From 498388e33f42c5cddae34dd4b5624affc5183a93 Mon Sep 17 00:00:00 2001 From: Xingjian Shi Date: Sat, 26 Oct 2019 18:47:09 -0700 Subject: [PATCH] add npx reshape (#16640) --- python/mxnet/_numpy_op_doc.py | 66 ++++++++ src/operator/numpy/np_matrix_op-inl.h | 54 ++++++- src/operator/numpy/np_matrix_op.cc | 206 ++++++++++++++++++++++--- src/operator/numpy/np_matrix_op.cu | 3 + tests/python/unittest/test_numpy_op.py | 63 ++++++++ 5 files changed, 371 insertions(+), 21 deletions(-) diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index d9bb378d3049..bcbef9d047d1 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -961,3 +961,69 @@ def _np_broadcast_to(array, shape, out=None): [1., 2., 3.]]) """ pass + + +def _npx_reshape(a, newshape, reverse=False, order='C'): + """ + Gives a new shape to an array without changing its data. + This function always returns a copy of the input array if + ``out`` is not provided. + + Parameters + ---------- + a : ndarray + Array to be reshaped. + newshape : int or tuple of ints + The new shape should be compatible with the original shape. + If an integer, then the result will be a 1-D array of that length. + One shape dimension can be -1. In this case, the value is inferred + from the length of the array and remaining dimensions. + -2 to -6 are used for data manipulation. + + - -2 copy this dimension from the input to the output shape. + - -3 will skip current dimension if and only if the current dim size is one. + - -4 copy all remain of the input dimensions to the output shape. + - -5 use the product of two consecutive dimensions of the input + shape as the output. + - -6 split one dimension of the input into two dimensions passed + subsequent to -6 in the new shape. + + reverse : bool, optional + If set to true, the special values will be inferred from right to left. + order : {'C'}, optional + Read the elements of `a` using this index order, and place the + elements into the reshaped array using this index order. 'C' + means to read / write the elements using C-like index order, + with the last axis index changing fastest, back to the first + axis index changing slowest. Other order types such as 'F'/'A' + may be added in the future. + + Returns + ------- + reshaped_array : ndarray + It will be always a copy of the original array. This behavior is different + from the official NumPy ``reshape`` operator where views of the original array may be + generated. + + Examples + -------- + >>> x = np.ones((2, 3, 8)) + >>> npx.reshape(x, (-2, -2, 2, -1)).shape + (2, 3, 2, 4) + >>> x = np.ones((8, 3, 3, 3, 4, 4)) + >>> npx.reshape(x, (-6, 2, -1, -4)).shape + (2, 4, 3, 3, 3, 4, 4) + >>> x = np.ones((8, 3, 3, 3, 4, 4)) + >>> npx.reshape(x, (-5, -4)).shape + (24, 3, 3, 4, 4) + >>> x = np.ones((8, 1, 1, 1, 3)) + >>> npx.reshape(x, (-2, -3, -3, -3, -2)).shape + (8, 3) + >>> x = np.ones((8, 3, 3, 3, 3, 8)) + >>> npx.reshape(x, (-4, -5), reverse=True).shape + (8, 3, 3, 3, 24) + >>> x = np.ones((8, 3, 2, 4, 8)) + >>> npx.reshape(x, (-4, -1, 2, -6), reverse=True).shape + (8, 3, 2, 4, 4, 2) + """ + pass diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h index b3206bf4aa75..9ce84835f1a8 100644 --- a/src/operator/numpy/np_matrix_op-inl.h +++ b/src/operator/numpy/np_matrix_op-inl.h @@ -27,6 +27,7 @@ #include #include +#include #include "../tensor/matrix_op-inl.h" #include "../nn/concat-inl.h" #include "../../common/utils.h" @@ -51,6 +52,58 @@ struct NumpyVstackParam : public dmlc::Parameter { } }; +struct NumpyReshapeParam : public dmlc::Parameter { + mxnet::TShape newshape; + std::string order; + DMLC_DECLARE_PARAMETER(NumpyReshapeParam) { + DMLC_DECLARE_FIELD(newshape) + .describe("The new shape should be compatible with the original shape." + " If an integer, then the result will be a 1-D array of that length." + " One shape dimension can be -1. In this case, the value is inferred" + " from the length of the array and remaining dimensions."); + DMLC_DECLARE_FIELD(order) + .set_default("C") + .describe("Read the elements of a using this index order, and place the elements into" + " the reshaped array using this index order. 'C' means to read/write the elements" + " using C-like index order, with the last axis index changing fastest," + " back to the first axis index changing slowest." + " Note that currently only C-like order is" + " supported"); + } +}; + +struct NumpyXReshapeParam : public dmlc::Parameter { + mxnet::TShape newshape; + bool reverse; + std::string order; + DMLC_DECLARE_PARAMETER(NumpyXReshapeParam) { + DMLC_DECLARE_FIELD(newshape) + .describe("The new shape should be compatible with the original shape." + " If an integer, then the result will be a 1-D array of that length." + " One shape dimension can be -1. In this case, the value is inferred" + " from the length of the array and remaining dimensions." + " -2 to -6 are used for data manipulation." + " -2 copy this dimension from the input to the output shape." + " -3 will skip current dimension if and only if the current dim size is one." + " -4 copy all remain of the input dimensions to the output shape." + " -5 use the product of two consecutive dimensions of the input" + " shape as the output." + " -6 split one dimension of the input into two dimensions passed" + " subsequent to -6 in the new shape."); + DMLC_DECLARE_FIELD(reverse) + .set_default(false) + .describe("If true then the special values are inferred from right to left"); + DMLC_DECLARE_FIELD(order) + .set_default("C") + .describe("Read the elements of a using this index order, and place the elements into" + " the reshaped array using this index order. 'C' means to read/write the elements" + " using C-like index order, with the last axis index changing fastest," + " back to the first axis index changing slowest." + " Note that currently only C-like order is" + " supported"); + } +}; + template void NumpyTranspose(const nnvm::NodeAttrs& attrs, const OpContext& ctx, @@ -731,7 +784,6 @@ inline void HSplitOpBackward(const nnvm::NodeAttrs &attrs, } SplitOpBackwardImpl(attrs, ctx, inputs, req, outputs, real_axis); } - } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc index 7bcd6ad27b52..0a6f9a150d8b 100644 --- a/src/operator/numpy/np_matrix_op.cc +++ b/src/operator/numpy/np_matrix_op.cc @@ -34,6 +34,9 @@ DMLC_REGISTER_PARAMETER(NumpyTransposeParam); DMLC_REGISTER_PARAMETER(NumpyRollParam); DMLC_REGISTER_PARAMETER(NumpyMoveaxisParam); DMLC_REGISTER_PARAMETER(NumpyRot90Param); +DMLC_REGISTER_PARAMETER(NumpyReshapeParam); +DMLC_REGISTER_PARAMETER(NumpyXReshapeParam); + bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_attrs, @@ -126,26 +129,6 @@ NNVM_REGISTER_OP(_np_transpose) .add_argument("a", "NDArray-or-Symbol", "Source input") .add_arguments(NumpyTransposeParam::__FIELDS__()); -struct NumpyReshapeParam : public dmlc::Parameter { - mxnet::TShape newshape; - std::string order; - DMLC_DECLARE_PARAMETER(NumpyReshapeParam) { - DMLC_DECLARE_FIELD(newshape) - .describe("The new shape should be compatible with the original shape." - " If an integer, then the result will be a 1-D array of that length." - " One shape dimension can be -1. In this case, the value is inferred" - " from the length of the array and remaining dimensions."); - DMLC_DECLARE_FIELD(order) - .set_default("C") - .describe("Read the elements of a using this index order, and place the elements into" - " the reshaped array using this index order. 'C' means to read/write the elements" - " using C-like index order, with the last axis index changing fastest, back to the" - " first axis index changing slowest. Note that currently only C-like order is" - " supported"); - } -}; - -DMLC_REGISTER_PARAMETER(NumpyReshapeParam); bool NumpyReshapeInferShape(const mxnet::TShape& src, mxnet::TShape* dst) { if (shape_is_known(src) && shape_is_known(*dst)) { @@ -202,6 +185,164 @@ bool NumpyReshapeShape(const nnvm::NodeAttrs& attrs, return success; } +bool NumpyXReshapeInferShape(const mxnet::TShape& src, + const mxnet::TShape& target, + mxnet::TShape* output, + const std::string &default_error_msg) { + bool target_shape_is_known = true; + dim_t target_size = 1; + for (int i = 0; i < target.ndim(); ++i) { + if (target[i] < 0) { + target_shape_is_known = false; + target_size = -1; + break; + } else { + target_size *= target[i]; + } + } + if (shape_is_known(src) && target_shape_is_known) { + CHECK_EQ(src.Size(), target_size) << default_error_msg; + *output = TShape(target.begin(), target.end()); + return true; + } else if (!shape_is_known(src) || target.ndim() == -1) { + return false; + } else { + int unknown_axis = -1; + dim_t known_dim_size_prod = 1; + std::vector output_shape_vector; + int src_inx = 0; + for (int i = 0; i < target.ndim(); ++i) { + dim_t proposed_dim = target[i]; + CHECK(proposed_dim >= -6) + << "Dimension size must be greater than -6, received " << proposed_dim; + if (proposed_dim == -1) { + // infer the known dimension + CHECK_LT(unknown_axis, 0) + << "One and only one dim can be inferred"; + unknown_axis = output_shape_vector.size(); + output_shape_vector.push_back(-1); + src_inx++; + } else if (proposed_dim == -2) { + // copy the dimension from src to output + CHECK_LT(src_inx, src.ndim()) + << "Unmatching dimension of proposed new shape"; + known_dim_size_prod *= src[src_inx]; + output_shape_vector.push_back(src[src_inx++]); + } else if (proposed_dim == -3) { + // skip the source dimension if and only if it is one + CHECK_EQ(src[src_inx], 1) + <<"-3 index should only be used to skip dimension size 1"; + src_inx++; + } else if (proposed_dim == -4) { + // copy all remaining dims from source + while (src_inx < src.ndim()) { + known_dim_size_prod *= src[src_inx]; + const dim_t dn = src[src_inx++]; + output_shape_vector.push_back(dn); + } + } else if (proposed_dim == -5) { + // merge two dims from source + CHECK_LT(src_inx, src.ndim()-1) + <<"Not enough dimensions left for the product"; + const dim_t d1 = src[src_inx++]; + const dim_t d2 = src[src_inx++]; + if (!mxnet::dim_size_is_known(d1) || !mxnet::dim_size_is_known(d2)) { + CHECK_LT(unknown_axis, 0) + << "One and only one dim can be inferred"; + unknown_axis = output_shape_vector.size(); + output_shape_vector.push_back(-1); + } else { + known_dim_size_prod *= d1*d2; + output_shape_vector.push_back(d1 * d2); + } + } else if (proposed_dim == -6) { + // split the source dim s into two dims + // read the left dim and then the right dim (either can be -1) + CHECK_LT(i + 2, target.ndim()); + CHECK_LT(src_inx, src.ndim()); + const dim_t d0 = src[src_inx++]; + dim_t d1 = target[++i]; + dim_t d2 = target[++i]; + CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1."; + if (d1 == -1 && d0 >= 0) d1 = d0 / d2; // d0 must be known to do this + if (d2 == -1 && d0 >= 0) d2 = d0 / d1; // d0 must be known to do this + CHECK(d1 * d2 == static_cast(d0) || static_cast(d0) == dim_t(-1)) + <<"Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0; + if (d1 == -1) { + CHECK_LT(unknown_axis, 0) + << "One and only one dim can be inferred"; + unknown_axis = output_shape_vector.size(); + } else if (d2 == -1) { + CHECK_LT(unknown_axis, 0) + << "One and only one dim can be inferred"; + unknown_axis = output_shape_vector.size() + 1; + } + known_dim_size_prod *= d0 == -1 ? 1 : d0; + output_shape_vector.push_back(d1); + output_shape_vector.push_back(d2); + } else { + // greater than 0, new shape + known_dim_size_prod *= proposed_dim; + output_shape_vector.push_back(proposed_dim); + src_inx++; + } + } + + if (unknown_axis > -1) { + // if the input in zero size tensor, the output must be of known shape of zero size + CHECK_NE(known_dim_size_prod, 0) << default_error_msg; + CHECK(src.Size() % known_dim_size_prod == 0) << default_error_msg; + output_shape_vector[unknown_axis] = src.Size() / known_dim_size_prod; + } + + *output = mxnet::TShape(output_shape_vector.begin(), output_shape_vector.end()); + CHECK_EQ((*output).Size(), src.Size()) << default_error_msg; + return true; + } +} + +bool NumpyXReshapeShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; + CHECK_EQ(out_attrs->size(), 1U); + const NumpyXReshapeParam& param = nnvm::get(attrs.parsed); + // sanity check + bool has_unknown_dim_size = false; + for (int i = 0; i < param.newshape.ndim(); ++i) { + if (param.newshape[i] < 0) { + CHECK_GE(param.newshape[i], -6) + << "Dimension size must be greater than or equal to -6"; + if (param.newshape[i] == -1) { + CHECK(!has_unknown_dim_size) << "Can only specify one unknown dimension"; + has_unknown_dim_size = true; + } + } + } + + mxnet::TShape output_shape; + bool success; + std::stringstream ss; + ss << "Cannot reshape array of shape " << in_attrs->at(0) + << " into shape " << param.newshape + << " , reverse = " << param.reverse; + std::string err_msg = ss.str(); + if (!param.reverse) { + success = NumpyXReshapeInferShape(in_attrs->at(0), + param.newshape, &output_shape, err_msg); + } else { + mxnet::TShape rev_in_shape = in_attrs->at(0); + mxnet::TShape rev_newshape = param.newshape; + std::reverse(rev_in_shape.begin(), rev_in_shape.end()); + std::reverse(rev_newshape.begin(), rev_newshape.end()); + success = NumpyXReshapeInferShape(rev_in_shape, + rev_newshape, &output_shape, err_msg); + std::reverse(output_shape.begin(), output_shape.end()); + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, output_shape); + return success; +} + NNVM_REGISTER_OP(_np_reshape) .describe(R"code()code" ADD_FILELINE) .add_alias("_npi_reshape") @@ -227,6 +368,31 @@ NNVM_REGISTER_OP(_np_reshape) .add_argument("a", "NDArray-or-Symbol", "Array to be reshaped.") .add_arguments(NumpyReshapeParam::__FIELDS__()); + +NNVM_REGISTER_OP(_npx_reshape) +.describe(R"code()code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyXReshapeShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_reshape"}) +.set_attr("FCompute", UnaryOp::IdentityCompute) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.set_attr("FInplaceIdentity", + [](const NodeAttrs& attrs){ + return std::vector{true}; + }) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "Array to be reshaped.") +.add_arguments(NumpyXReshapeParam::__FIELDS__()); + bool NumpySqueezeShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_attrs, mxnet::ShapeVector *out_attrs) { diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu index 8c8301bb3bbf..6b4f7a11a9a2 100644 --- a/src/operator/numpy/np_matrix_op.cu +++ b/src/operator/numpy/np_matrix_op.cu @@ -109,5 +109,8 @@ NNVM_REGISTER_OP(_npi_hsplit) NNVM_REGISTER_OP(_npi_hsplit_backward) .set_attr("FCompute", HSplitOpBackward); +NNVM_REGISTER_OP(_npx_reshape) +.set_attr("FCompute", UnaryOp::IdentityCompute); + } // namespace op } // namespace mxnet diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 5476fbee8be4..98a7b05dca9f 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3674,6 +3674,69 @@ def test_np_true_divide(): assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) +@with_seed() +@use_np +def test_npx_reshape(): + class TestNumpyXReshape(HybridBlock): + def __init__(self, newshape, reverse): + super(TestNumpyXReshape, self).__init__() + self._newshape = newshape + self._reverse = reverse + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.npx.reshape(a, self._newshape, reverse=self._reverse) + + test_cases = [ + [(2, 3, 5, 5), (-2, -1), False, (2, 75)], + [(2, 3, 5, 5), (-2, -2, -1), False, (2, 3, 25)], + [(5, 3, 4, 5), (-2, -1, -2), False, (5, 15, 4)], + [(2, 3, 5, 4), (-1, -2, -2), False, (8, 3, 5)], + [(2, 3, 5, 5), (-2, -2, -2, -2), False, (2, 3, 5, 5)], + [(2, 1, 4, 5), (-2, -3, -2, -2), False, (2, 4, 5)], + [(1, 1, 4, 1), (-3, -3, -2, -2), False, (4, 1)], + [(1, 1, 1, 1), (-3, -3, -3, -3), False, ()], + [(2, 4, 5, 3), (-1, 2, 2, 1), False, (30, 2, 2, 1)], + [(2, 3, 5, 6), (-4,), False, (2, 3, 5, 6)], + [(2, 3, 5, 6), (6, 1, -4), False, (6, 1, 5, 6)], + [(2, 3, 5, 6), (-5, -5), False, (6, 30)], + [(2, 3, 5, 6), (-5, -1), False, (6, 30)], + [(64,), (-6, 16, 4), False, (16, 4)], + [(64,), (-6, 16, -1), False, (16, 4)], + [(64, 1, 2, 3), (-6, 16, -1, -4), False, (16, 4, 1, 2, 3)], + [(8, 5, 4, 6), (-4, -1, 3, -6), True, (8, 5, 4, 2, 3)] + ] + for hybridize in [True, False]: + for shape, newshape, reverse, expected_ret_shape in test_cases: + for grad_req in ['write', 'add']: + # test gluon + test_reshape = TestNumpyXReshape(newshape=newshape, reverse=reverse) + if hybridize: + test_reshape.hybridize() + + a = mx.np.random.uniform(-1, 1, shape).astype(np.float32) + init_a_grad = mx.np.random.uniform(-1, 1, shape).astype(np.float32) + a.attach_grad(grad_req=grad_req) + if grad_req == 'add': + a.grad[:] = init_a_grad + with mx.autograd.record(): + y = test_reshape(a) + assert y.shape == expected_ret_shape,\ + 'y.shape={}, expected_ret_shape={}'.format(y.shape, expected_ret_shape) + assert_almost_equal(y.asnumpy(), a.asnumpy().reshape(expected_ret_shape), rtol=1e-3, atol=1e-5) + + # test backward + mx.autograd.backward(y) + expected_grad = _np.ones(shape) + if grad_req == 'add': + expected_grad += init_a_grad.asnumpy() + assert_almost_equal(a.grad.asnumpy(), expected_grad, rtol=1e-3, atol=1e-5) + + # test imperative + npx_out = npx.reshape(a, newshape, reverse=reverse) + expected_out = _np.reshape(a.asnumpy(), expected_ret_shape) + assert_almost_equal(npx_out.asnumpy(), expected_out, rtol=1e-3, atol=1e-5) + + if __name__ == '__main__': import nose nose.runmodule()