From 735438aea76f28b92c89629ccfc8612b53500a33 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Wed, 12 Dec 2018 22:36:17 -0800 Subject: [PATCH 01/14] Added the gradient reversal contrib operator Missing test for backwards pass --- .../contrib/gradient_reversal_op-inl.h | 213 ++++++++++++++++++ src/operator/contrib/gradient_reversal_op.cc | 68 ++++++ src/operator/contrib/gradient_reversal_op.cu | 39 ++++ .../python/unittest/test_contrib_operator.py | 5 + 4 files changed, 325 insertions(+) create mode 100644 src/operator/contrib/gradient_reversal_op-inl.h create mode 100644 src/operator/contrib/gradient_reversal_op.cc create mode 100644 src/operator/contrib/gradient_reversal_op.cu diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h new file mode 100644 index 000000000000..00753becbf85 --- /dev/null +++ b/src/operator/contrib/gradient_reversal_op-inl.h @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file gradient_reversal_op-inl.h + * \brief + * \author Istvan Fehervari +*/ +#ifndef MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ +#define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ + +#include +#include "../mshadow_op.h" +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" +#include "../tensor/init_op.h" + +namespace mxnet { +namespace op { + +struct GradientReversalParam : public dmlc::Parameter { + float l; + DMLC_DECLARE_PARAMETER(GradientReversalParam) { + DMLC_DECLARE_FIELD(l) + .set_default(0.0) + .describe("Lambda coefficient of the gradient reversal function."); + } +}; + +inline bool GradientReversalOpShape(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + + SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U; +} + +inline bool GradientReversalOpType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + return out_attrs->at(0) != -1; +} + +inline bool GradientReversalOpStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const int in_stype = in_attrs->at(0); + int& out_stype = out_attrs->at(0); + bool dispatched = false; + if (!dispatched && in_stype == kDefaultStorage) { + // dns -> dns + dispatched = storage_type_assign(&out_stype, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + } + if (!dispatched && in_stype == kCSRStorage) { + // csr -> csr + dispatched = storage_type_assign(&out_stype, kCSRStorage, + dispatch_mode, DispatchMode::kFComputeEx); + } + if (!dispatched) { + dispatched = dispatch_fallback(out_attrs, dispatch_mode); + } + return dispatched; +} + +template +struct gradient_reversal_forward { + template + MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data) { + KERNEL_ASSIGN(out_data[i], req, in_data[i]); + } +}; + +template +struct gradient_reversal_backward { + template + MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* out_grad, + const DType* in_data, const float l) { + KERNEL_ASSIGN(in_grad[i], req, out_grad[i] * -l); + } +}; + +template +void GradientReversalOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + using namespace mxnet_op; + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + Kernel, xpu>::Launch( + s, out_data.Size(), out_data.dptr(), in_data.dptr()); + }); + }); +} + +template +void GradientReversalOpForwardCsrImpl(const OpContext& ctx, + const NDArray& input, + const OpReqType req, + const NDArray& output) { + using namespace mshadow; + using namespace mxnet_op; + using namespace csr; + if (req == kNullOp) return; + CHECK_EQ(req, kWriteTo) << "GradientReversalOp with CSR only supports kWriteTo"; + Stream *s = ctx.get_stream(); + if (!input.storage_initialized()) { + FillZerosCsrImpl(s, output); + return; + } + const nnvm::dim_t nnz = input.storage_shape()[0]; + const nnvm::dim_t num_rows = output.shape()[0]; + output.CheckAndAlloc({Shape1(num_rows + 1), Shape1(nnz)}); + CHECK_EQ(output.aux_type(kIdx), output.aux_type(kIndPtr)) + << "The dtypes of indices and indptr don't match"; + MSHADOW_TYPE_SWITCH(output.dtype(), DType, { + MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), IType, { + MXNET_ASSIGN_REQ_SWITCH(req, req_type, { + Kernel, xpu>::Launch( + s, nnz, output.data().dptr(), input.data().dptr()); + Copy(output.aux_data(kIdx).FlatTo1D(s), + input.aux_data(kIdx).FlatTo1D(s), s); + Copy(output.aux_data(kIndPtr).FlatTo1D(s), + input.aux_data(kIndPtr).FlatTo1D(s), s); + }); + }); + }); +} + +template +void GradientReversalOpForwardEx(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + const auto in_stype = inputs[0].storage_type(); + const auto out_stype = outputs[0].storage_type(); + if (in_stype == kCSRStorage && out_stype == kCSRStorage) { + GradientReversalOpForwardCsrImpl(ctx, inputs[0], req[0], outputs[0]); + } else { + LogUnimplementedOp(attrs, ctx, inputs, req, outputs); + } +} + +template +void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + const TBlob& out_grad = inputs[0]; + const TBlob& in_data = inputs[1]; + const TBlob& in_grad = outputs[0]; + const GradientReversalParam& param = nnvm::get(attrs.parsed); + using namespace mxnet_op; + MSHADOW_TYPE_SWITCH(out_grad.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + Kernel, xpu>::Launch( + s, in_grad.Size(), in_grad.dptr(), out_grad.dptr(), + in_data.dptr(), param.l); + }); + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ \ No newline at end of file diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc new file mode 100644 index 000000000000..4ead4d617c74 --- /dev/null +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file gradient_reversal_op.cc + * \brief + * \author Istvan Fehervari +*/ +#include "./gradient_reversal_op-inl.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(GradientReversalParam); + +NNVM_REGISTER_OP(_contrib_gradientreversal) +.describe(R"code(This operators implements the gradient reversal function. +In forward pass it acts as an identity tranform. During backpropagation it +multiplies the gradient from the subsequent level by −l and passes it to +the preceding layer. + +)code" ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FInferShape", GradientReversalOpShape) +.set_attr("FInferType", GradientReversalOpType) +.set_attr("FInferStorageType", GradientReversalOpStorageType) +.set_attr("FCompute", GradientReversalOpForward) +.set_attr("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"}) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(GradientReversalParam::__FIELDS__()); + +NNVM_REGISTER_OP(_contrib_backward_gradientreversal) +.set_attr_parser(ParamParser) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", GradientReversalOpBackward) +.set_attr("FComputeEx", GradientReversalOpForwardEx); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu new file mode 100644 index 000000000000..b391cebf1eec --- /dev/null +++ b/src/operator/contrib/gradient_reversal_op.cu @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file gradient_reversal_op.cu + * \brief + * \author Istvan Fehervari +*/ +#include "./gradient_reversal_op-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_contrib_gradientreversal) +.set_attr("FComputeEx", GradientReversalOpForwardEx) +.set_attr("FCompute", GradientReversalOpForward); + +NNVM_REGISTER_OP(_contrib_backward_gradientreversal) +.set_attr("FCompute", GradientReversalOpBackward); + +} // namespace op +} // namespace mxnet \ No newline at end of file diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 43d3db648a85..1bac701179fa 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -261,6 +261,11 @@ def test_multibox_target_op(): assert_array_equal(loc_mask.asnumpy(), expected_loc_mask) assert_array_equal(cls_target.asnumpy(), expected_cls_target) +def test_gradient_reversal_op(): + input = mx.nd.normal((5,5)) + + output = mx.nd.contrib.gradientreversal(input, l=1.0) + assert_array_equal(input, output) if __name__ == '__main__': import nose From 44eda552b9202afe680b216905ef7984e2592d15 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Wed, 12 Dec 2018 23:01:11 -0800 Subject: [PATCH 02/14] Fixed linting errors --- src/operator/contrib/gradient_reversal_op-inl.h | 3 ++- src/operator/contrib/gradient_reversal_op.cu | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h index 00753becbf85..3662312a535f 100644 --- a/src/operator/contrib/gradient_reversal_op-inl.h +++ b/src/operator/contrib/gradient_reversal_op-inl.h @@ -27,6 +27,7 @@ #define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ #include +#include #include "../mshadow_op.h" #include "../mxnet_op.h" #include "../operator_common.h" @@ -210,4 +211,4 @@ void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs, } // namespace op } // namespace mxnet -#endif // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ \ No newline at end of file +#endif // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu index b391cebf1eec..cf4871dd7c4e 100644 --- a/src/operator/contrib/gradient_reversal_op.cu +++ b/src/operator/contrib/gradient_reversal_op.cu @@ -36,4 +36,4 @@ NNVM_REGISTER_OP(_contrib_backward_gradientreversal) .set_attr("FCompute", GradientReversalOpBackward); } // namespace op -} // namespace mxnet \ No newline at end of file +} // namespace mxnet From e9bf741118181909356a7ac1ca1bee3128b9bc29 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Thu, 13 Dec 2018 10:31:15 -0800 Subject: [PATCH 03/14] Fixed forward test --- src/operator/contrib/gradient_reversal_op.cc | 3 +-- tests/python/unittest/test_contrib_operator.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc index 4ead4d617c74..f88568be990e 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -33,9 +33,8 @@ DMLC_REGISTER_PARAMETER(GradientReversalParam); NNVM_REGISTER_OP(_contrib_gradientreversal) .describe(R"code(This operators implements the gradient reversal function. In forward pass it acts as an identity tranform. During backpropagation it -multiplies the gradient from the subsequent level by −l and passes it to +multiplies the gradient from the subsequent level by a negative factor and passes it to the preceding layer. - )code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_num_inputs(1) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 1bac701179fa..ac54db0b4078 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -262,7 +262,7 @@ def test_multibox_target_op(): assert_array_equal(cls_target.asnumpy(), expected_cls_target) def test_gradient_reversal_op(): - input = mx.nd.normal((5,5)) + input = mx.nd.random.normal(shape=(5,5)) output = mx.nd.contrib.gradientreversal(input, l=1.0) assert_array_equal(input, output) From dfd19066e93d5929a40ec1c146c321e46665cf23 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Thu, 13 Dec 2018 11:17:56 -0800 Subject: [PATCH 04/14] Added random forward / backward test for gradient reversal --- .../python/unittest/test_contrib_operator.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index ac54db0b4078..2a8a6735d07e 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -262,10 +262,40 @@ def test_multibox_target_op(): assert_array_equal(cls_target.asnumpy(), expected_cls_target) def test_gradient_reversal_op(): - input = mx.nd.random.normal(shape=(5,5)) + # We use the quadratic function in combination with gradient reversal + def f(x, a, b, c): + return a * x**2 + b * x + c - output = mx.nd.contrib.gradientreversal(input, l=1.0) - assert_array_equal(input, output) + a = np.random.random_sample() + b = np.random.random_sample() + c = np.random.random_sample() + l = np.random.random_sample() + data = mx.symbol.Variable('data') + quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c) + gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l) + + for dtype in [np.float16, np.float32, np.float64]: + for ndim in range(1, 6): + shape = rand_shape_nd(ndim, 5) + data_np = np.random.randn(*shape).astype(dtype) + expected = f(data_np, a, b, c) + backward_expected = (2 * a * data_np + b) * -l + + # check imperative forward + output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c) + output = mx.nd.contrib.gradientreversal(output, l=l) + assert_almost_equal(output.asnumpy(),expected, + rtol=1e-2 if dtype is np.float16 else 1e-5, + atol=1e-2 if dtype is np.float16 else 1e-5) + # check forward + check_symbolic_forward(gr_q_sym, [data_np], [expected], + rtol=1e-2 if dtype is np.float16 else 1e-5, + atol=1e-2 if dtype is np.float16 else 1e-5) + # check backward + check_symbolic_backward(gr_q_sym, [data_np], [np.ones(expected.shape)], + [backward_expected], + rtol=1e-2 if dtype is np.float16 else 1e-5, + atol=1e-2 if dtype is np.float16 else 1e-5) if __name__ == '__main__': import nose From 5c7353301dce3de076dd9a9b9239950a4b332fa4 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sat, 15 Dec 2018 08:27:46 -0800 Subject: [PATCH 05/14] Update test_contrib_operator.py --- tests/python/unittest/test_contrib_operator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 2a8a6735d07e..51aadb4e892d 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -270,6 +270,7 @@ def f(x, a, b, c): b = np.random.random_sample() c = np.random.random_sample() l = np.random.random_sample() + data = mx.symbol.Variable('data') quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c) gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l) From 0bc7986affc6a44f7feab0c1229c2882c5232855 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Thu, 10 Jan 2019 18:00:45 -0800 Subject: [PATCH 06/14] Fixed typo in gradient reversal op description --- src/operator/contrib/gradient_reversal_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc index f88568be990e..9d1bff3912a3 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -31,7 +31,7 @@ namespace op { DMLC_REGISTER_PARAMETER(GradientReversalParam); NNVM_REGISTER_OP(_contrib_gradientreversal) -.describe(R"code(This operators implements the gradient reversal function. +.describe(R"code(This operator implements the gradient reversal function. In forward pass it acts as an identity tranform. During backpropagation it multiplies the gradient from the subsequent level by a negative factor and passes it to the preceding layer. From ad72f41d9e984683cf233705e467b453845f0e62 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sat, 12 Jan 2019 16:25:51 -0800 Subject: [PATCH 07/14] Replace forward code with the identitiy implementation --- .../contrib/gradient_reversal_op-inl.h | 129 ------------------ src/operator/contrib/gradient_reversal_op.cc | 18 +-- src/operator/contrib/gradient_reversal_op.cu | 5 +- 3 files changed, 13 insertions(+), 139 deletions(-) diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h index 3662312a535f..ac7957cd0831 100644 --- a/src/operator/contrib/gradient_reversal_op-inl.h +++ b/src/operator/contrib/gradient_reversal_op-inl.h @@ -46,62 +46,6 @@ struct GradientReversalParam : public dmlc::Parameter { } }; -inline bool GradientReversalOpShape(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { - CHECK_EQ(in_attrs->size(), 1U); - CHECK_EQ(out_attrs->size(), 1U); - - SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); - SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U; -} - -inline bool GradientReversalOpType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { - CHECK_EQ(in_attrs->size(), 1U); - CHECK_EQ(out_attrs->size(), 1U); - - TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); - TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - return out_attrs->at(0) != -1; -} - -inline bool GradientReversalOpStorageType(const nnvm::NodeAttrs& attrs, - const int dev_mask, - DispatchMode* dispatch_mode, - std::vector* in_attrs, - std::vector* out_attrs) { - CHECK_EQ(in_attrs->size(), 1U); - CHECK_EQ(out_attrs->size(), 1U); - const int in_stype = in_attrs->at(0); - int& out_stype = out_attrs->at(0); - bool dispatched = false; - if (!dispatched && in_stype == kDefaultStorage) { - // dns -> dns - dispatched = storage_type_assign(&out_stype, kDefaultStorage, - dispatch_mode, DispatchMode::kFCompute); - } - if (!dispatched && in_stype == kCSRStorage) { - // csr -> csr - dispatched = storage_type_assign(&out_stype, kCSRStorage, - dispatch_mode, DispatchMode::kFComputeEx); - } - if (!dispatched) { - dispatched = dispatch_fallback(out_attrs, dispatch_mode); - } - return dispatched; -} - -template -struct gradient_reversal_forward { - template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data) { - KERNEL_ASSIGN(out_data[i], req, in_data[i]); - } -}; - template struct gradient_reversal_backward { template @@ -111,79 +55,6 @@ struct gradient_reversal_backward { } }; -template -void GradientReversalOpForward(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 1U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - mshadow::Stream *s = ctx.get_stream(); - const TBlob& in_data = inputs[0]; - const TBlob& out_data = outputs[0]; - using namespace mxnet_op; - MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { - Kernel, xpu>::Launch( - s, out_data.Size(), out_data.dptr(), in_data.dptr()); - }); - }); -} - -template -void GradientReversalOpForwardCsrImpl(const OpContext& ctx, - const NDArray& input, - const OpReqType req, - const NDArray& output) { - using namespace mshadow; - using namespace mxnet_op; - using namespace csr; - if (req == kNullOp) return; - CHECK_EQ(req, kWriteTo) << "GradientReversalOp with CSR only supports kWriteTo"; - Stream *s = ctx.get_stream(); - if (!input.storage_initialized()) { - FillZerosCsrImpl(s, output); - return; - } - const nnvm::dim_t nnz = input.storage_shape()[0]; - const nnvm::dim_t num_rows = output.shape()[0]; - output.CheckAndAlloc({Shape1(num_rows + 1), Shape1(nnz)}); - CHECK_EQ(output.aux_type(kIdx), output.aux_type(kIndPtr)) - << "The dtypes of indices and indptr don't match"; - MSHADOW_TYPE_SWITCH(output.dtype(), DType, { - MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), IType, { - MXNET_ASSIGN_REQ_SWITCH(req, req_type, { - Kernel, xpu>::Launch( - s, nnz, output.data().dptr(), input.data().dptr()); - Copy(output.aux_data(kIdx).FlatTo1D(s), - input.aux_data(kIdx).FlatTo1D(s), s); - Copy(output.aux_data(kIndPtr).FlatTo1D(s), - input.aux_data(kIndPtr).FlatTo1D(s), s); - }); - }); - }); -} - -template -void GradientReversalOpForwardEx(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 1U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - const auto in_stype = inputs[0].storage_type(); - const auto out_stype = outputs[0].storage_type(); - if (in_stype == kCSRStorage && out_stype == kCSRStorage) { - GradientReversalOpForwardCsrImpl(ctx, inputs[0], req[0], outputs[0]); - } else { - LogUnimplementedOp(attrs, ctx, inputs, req, outputs); - } -} - template void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc index 9d1bff3912a3..8ada72990fc4 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -24,6 +24,7 @@ * \author Istvan Fehervari */ #include "./gradient_reversal_op-inl.h" +#include "../tensor/elemwise_unary_op.h" namespace mxnet { namespace op { @@ -43,14 +44,15 @@ the preceding layer. [](const NodeAttrs& attrs) { return std::vector{"data"}; }) -.set_attr("FInferShape", GradientReversalOpShape) -.set_attr("FInferType", GradientReversalOpType) -.set_attr("FInferStorageType", GradientReversalOpStorageType) -.set_attr("FCompute", GradientReversalOpForward) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) +.set_attr("FCompute", UnaryOp::IdentityCompute) +.set_attr("FComputeEx", UnaryOp::IdentityComputeEx) .set_attr("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"}) -.set_attr("FInplaceOption", - [](const NodeAttrs& attrs) { - return std::vector >{{0, 0}}; +.set_attr("FInplaceIdentity", + [](const NodeAttrs& attrs){ + return std::vector{true}; }) .add_argument("data", "NDArray-or-Symbol", "Input ndarray") .add_arguments(GradientReversalParam::__FIELDS__()); @@ -61,7 +63,7 @@ NNVM_REGISTER_OP(_contrib_backward_gradientreversal) .set_num_outputs(1) .set_attr("TIsBackward", true) .set_attr("FCompute", GradientReversalOpBackward) -.set_attr("FComputeEx", GradientReversalOpForwardEx); +.set_attr("FComputeEx", UnaryOp::IdentityComputeEx); } // namespace op } // namespace mxnet diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu index cf4871dd7c4e..985d1898cef6 100644 --- a/src/operator/contrib/gradient_reversal_op.cu +++ b/src/operator/contrib/gradient_reversal_op.cu @@ -24,13 +24,14 @@ * \author Istvan Fehervari */ #include "./gradient_reversal_op-inl.h" +#include "../tensor/elemwise_unary_op.h" namespace mxnet { namespace op { NNVM_REGISTER_OP(_contrib_gradientreversal) -.set_attr("FComputeEx", GradientReversalOpForwardEx) -.set_attr("FCompute", GradientReversalOpForward); +.set_attr("FComputeEx", UnaryOp::IdentityComputeEx) +.set_attr("FCompute", UnaryOp::IdentityCompute); NNVM_REGISTER_OP(_contrib_backward_gradientreversal) .set_attr("FCompute", GradientReversalOpBackward); From 912f2a0f69517602255ab1785e04e94db711cb64 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sun, 13 Jan 2019 13:24:43 -0800 Subject: [PATCH 08/14] Fixed typos in function docs --- include/mxnet/op_attr_types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h index dd818457f827..41be554953fd 100644 --- a/include/mxnet/op_attr_types.h +++ b/include/mxnet/op_attr_types.h @@ -254,7 +254,7 @@ using FNDArrayFunction = std::function& inputs, std::vector* outputs)>; /*! - * \brief Resiger a compute function for simple stateless forward only operator + * \brief Register a compute function for simple stateless forward only operator * * \note Register under "FCompute" and "FCompute" */ @@ -264,7 +264,7 @@ using FCompute = std::function& req, const std::vector& outputs)>; /*! - * \brief Resiger an NDArray compute function for simple stateless forward only operator + * \brief Register an NDArray compute function for simple stateless forward only operator * \note Register under "FComputeEx" and "FComputeEx" * Dispatched only when inferred dispatch_mode is FDispatchComputeEx */ @@ -275,7 +275,7 @@ using FComputeEx = std::function& outputs)>; /*! - * \brief Resiger a storage and dispatch mode inference function based on + * \brief Register a storage and dispatch mode inference function based on * storage types of the inputs and outputs, and the dev_mask for the operator. * * \note Register under "FInferStorageType" From f865e14906d8b50019e6ff5053b36375f52ca7d4 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sun, 13 Jan 2019 13:25:13 -0800 Subject: [PATCH 09/14] Changed default behavior to identity --- src/operator/contrib/gradient_reversal_op-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h index ac7957cd0831..cc445158fdd8 100644 --- a/src/operator/contrib/gradient_reversal_op-inl.h +++ b/src/operator/contrib/gradient_reversal_op-inl.h @@ -41,7 +41,7 @@ struct GradientReversalParam : public dmlc::Parameter { float l; DMLC_DECLARE_PARAMETER(GradientReversalParam) { DMLC_DECLARE_FIELD(l) - .set_default(0.0) + .set_default(1.0) .describe("Lambda coefficient of the gradient reversal function."); } }; From 0cd8416340d7530be2dc453986ddcd01a0d247c8 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sun, 13 Jan 2019 14:06:21 -0800 Subject: [PATCH 10/14] Replaced backward code with scalar_mul --- .../contrib/gradient_reversal_op-inl.h | 85 ------------------- src/operator/contrib/gradient_reversal_op.cc | 68 ++++++++++----- src/operator/contrib/gradient_reversal_op.cu | 5 +- 3 files changed, 52 insertions(+), 106 deletions(-) delete mode 100644 src/operator/contrib/gradient_reversal_op-inl.h diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h deleted file mode 100644 index cc445158fdd8..000000000000 --- a/src/operator/contrib/gradient_reversal_op-inl.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2018 by Contributors - * \file gradient_reversal_op-inl.h - * \brief - * \author Istvan Fehervari -*/ -#ifndef MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ -#define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ - -#include -#include -#include "../mshadow_op.h" -#include "../mxnet_op.h" -#include "../operator_common.h" -#include "../elemwise_op_common.h" -#include "../tensor/init_op.h" - -namespace mxnet { -namespace op { - -struct GradientReversalParam : public dmlc::Parameter { - float l; - DMLC_DECLARE_PARAMETER(GradientReversalParam) { - DMLC_DECLARE_FIELD(l) - .set_default(1.0) - .describe("Lambda coefficient of the gradient reversal function."); - } -}; - -template -struct gradient_reversal_backward { - template - MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* out_grad, - const DType* in_data, const float l) { - KERNEL_ASSIGN(in_grad[i], req, out_grad[i] * -l); - } -}; - -template -void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 2U); - CHECK_EQ(outputs.size(), 1U); - CHECK_EQ(req.size(), 1U); - mshadow::Stream *s = ctx.get_stream(); - const TBlob& out_grad = inputs[0]; - const TBlob& in_data = inputs[1]; - const TBlob& in_grad = outputs[0]; - const GradientReversalParam& param = nnvm::get(attrs.parsed); - using namespace mxnet_op; - MSHADOW_TYPE_SWITCH(out_grad.type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { - Kernel, xpu>::Launch( - s, in_grad.Size(), in_grad.dptr(), out_grad.dptr(), - in_data.dptr(), param.l); - }); - }); -} - -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_ diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc index 8ada72990fc4..e8a6b35fe6fd 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -23,29 +23,62 @@ * \brief * \author Istvan Fehervari */ -#include "./gradient_reversal_op-inl.h" #include "../tensor/elemwise_unary_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" namespace mxnet { namespace op { -DMLC_REGISTER_PARAMETER(GradientReversalParam); +static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1); + CHECK_EQ(out_attrs->size(), 1); + const auto in_stype = in_attrs->at(0); + auto &out_stype = out_attrs->at(0); + bool dispatched = false; + if (!dispatched && (in_stype == kDefaultStorage)) { + // dense -> dense + dispatched = storage_type_assign(&out_stype, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + } + if (!dispatched && in_stype == kRowSparseStorage) { + // row sparse -> row sparse + dispatched = storage_type_assign(&out_stype, kRowSparseStorage, + dispatch_mode, DispatchMode::kFComputeEx); + // FComputeEx can handle dns output on cpu, too + if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) { + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + dispatched = true; + } + } + if (!dispatched && in_stype == kCSRStorage) { + // csr -> csr + dispatched = storage_type_assign(&out_stype, kCSRStorage, + dispatch_mode, DispatchMode::kFComputeEx); + // FComputeEx can handle dns output on cpu, too + if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) { + DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx); + dispatched = true; + } + } + if (!dispatched) { + dispatched = dispatch_fallback(out_attrs, dispatch_mode); + } + return dispatched; +} -NNVM_REGISTER_OP(_contrib_gradientreversal) +MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal) .describe(R"code(This operator implements the gradient reversal function. In forward pass it acts as an identity tranform. During backpropagation it multiplies the gradient from the subsequent level by a negative factor and passes it to the preceding layer. )code" ADD_FILELINE) -.set_attr_parser(ParamParser) -.set_num_inputs(1) -.set_num_outputs(1) -.set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - return std::vector{"data"}; +.set_attr_parser([](NodeAttrs* attrs) { + attrs->parsed = std::stod(attrs->dict["scalar"]); }) -.set_attr("FInferShape", ElemwiseShape<1, 1>) -.set_attr("FInferType", ElemwiseType<1, 1>) .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) .set_attr("FCompute", UnaryOp::IdentityCompute) .set_attr("FComputeEx", UnaryOp::IdentityComputeEx) @@ -54,16 +87,13 @@ the preceding layer. [](const NodeAttrs& attrs){ return std::vector{true}; }) -.add_argument("data", "NDArray-or-Symbol", "Input ndarray") -.add_arguments(GradientReversalParam::__FIELDS__()); +.add_argument("scalar", "float", "scalar input"); -NNVM_REGISTER_OP(_contrib_backward_gradientreversal) -.set_attr_parser(ParamParser) -.set_num_inputs(2) -.set_num_outputs(1) +MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientreversal) .set_attr("TIsBackward", true) -.set_attr("FCompute", GradientReversalOpBackward) -.set_attr("FComputeEx", UnaryOp::IdentityComputeEx); +.set_attr("FInferStorageType", BinaryScalarStorageType) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FComputeEx", BinaryScalarOp::ComputeEx); } // namespace op } // namespace mxnet diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu index 985d1898cef6..ed8b3cdda8dc 100644 --- a/src/operator/contrib/gradient_reversal_op.cu +++ b/src/operator/contrib/gradient_reversal_op.cu @@ -23,8 +23,8 @@ * \brief * \author Istvan Fehervari */ -#include "./gradient_reversal_op-inl.h" #include "../tensor/elemwise_unary_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" namespace mxnet { namespace op { @@ -34,7 +34,8 @@ NNVM_REGISTER_OP(_contrib_gradientreversal) .set_attr("FCompute", UnaryOp::IdentityCompute); NNVM_REGISTER_OP(_contrib_backward_gradientreversal) -.set_attr("FCompute", GradientReversalOpBackward); +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FComputeEx", BinaryScalarOp::ComputeEx); } // namespace op } // namespace mxnet From 19194b0f67ed5939b4557cd24b40ac33121c4b7d Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Sun, 13 Jan 2019 15:31:29 -0800 Subject: [PATCH 11/14] Fixed backward operator and unit test --- src/operator/contrib/gradient_reversal_op.cc | 4 ++-- tests/python/unittest/test_contrib_operator.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc index e8a6b35fe6fd..63877e8807df 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_reversal_op.cc @@ -72,7 +72,7 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs, MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal) .describe(R"code(This operator implements the gradient reversal function. -In forward pass it acts as an identity tranform. During backpropagation it +In forward pass it acts as an identity transform. During backpropagation it multiplies the gradient from the subsequent level by a negative factor and passes it to the preceding layer. )code" ADD_FILELINE) @@ -82,7 +82,7 @@ the preceding layer. .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) .set_attr("FCompute", UnaryOp::IdentityCompute) .set_attr("FComputeEx", UnaryOp::IdentityComputeEx) -.set_attr("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"}) +.set_attr("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientreversal"}) .set_attr("FInplaceIdentity", [](const NodeAttrs& attrs){ return std::vector{true}; diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 51aadb4e892d..f2ad1dbbbc3b 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -269,23 +269,23 @@ def f(x, a, b, c): a = np.random.random_sample() b = np.random.random_sample() c = np.random.random_sample() - l = np.random.random_sample() + l = np.random.random_sample() - 0.5 data = mx.symbol.Variable('data') quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c) - gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l) + gr_q_sym = mx.sym.contrib.gradientreversal(quad_sym, scalar=l) for dtype in [np.float16, np.float32, np.float64]: for ndim in range(1, 6): shape = rand_shape_nd(ndim, 5) data_np = np.random.randn(*shape).astype(dtype) expected = f(data_np, a, b, c) - backward_expected = (2 * a * data_np + b) * -l + backward_expected = (2 * a * data_np + b) * l # check imperative forward output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c) - output = mx.nd.contrib.gradientreversal(output, l=l) - assert_almost_equal(output.asnumpy(),expected, + output = mx.nd.contrib.gradientreversal(output, scalar=l) + assert_almost_equal(output.asnumpy(), expected, rtol=1e-2 if dtype is np.float16 else 1e-5, atol=1e-2 if dtype is np.float16 else 1e-5) # check forward From d1fffac5e2fae2b41c2f4a2d3f54b7e4cc8e5c0b Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Mon, 14 Jan 2019 11:41:30 -0800 Subject: [PATCH 12/14] Renamed operator to gradient multiplier --- ...ient_reversal_op.cc => gradient_multiplier_op.cc} | 12 ++++++------ ...ient_reversal_op.cu => gradient_multiplier_op.cu} | 6 +++--- tests/python/unittest/test_contrib_operator.py | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) rename src/operator/contrib/{gradient_reversal_op.cc => gradient_multiplier_op.cc} (92%) rename src/operator/contrib/{gradient_reversal_op.cu => gradient_multiplier_op.cu} (91%) diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_multiplier_op.cc similarity index 92% rename from src/operator/contrib/gradient_reversal_op.cc rename to src/operator/contrib/gradient_multiplier_op.cc index 63877e8807df..9d15ee14cd12 100644 --- a/src/operator/contrib/gradient_reversal_op.cc +++ b/src/operator/contrib/gradient_multiplier_op.cc @@ -19,7 +19,7 @@ /*! * Copyright (c) 2018 by Contributors - * \file gradient_reversal_op.cc + * \file gradient_multiplier_op.cc * \brief * \author Istvan Fehervari */ @@ -70,10 +70,10 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs, return dispatched; } -MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal) -.describe(R"code(This operator implements the gradient reversal function. +MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientmultiplier) +.describe(R"code(This operator implements the gradient multiplier function. In forward pass it acts as an identity transform. During backpropagation it -multiplies the gradient from the subsequent level by a negative factor and passes it to +multiplies the gradient from the subsequent level by a scalar factor and passes it to the preceding layer. )code" ADD_FILELINE) .set_attr_parser([](NodeAttrs* attrs) { @@ -82,14 +82,14 @@ the preceding layer. .set_attr("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>) .set_attr("FCompute", UnaryOp::IdentityCompute) .set_attr("FComputeEx", UnaryOp::IdentityComputeEx) -.set_attr("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientreversal"}) +.set_attr("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientmultiplier"}) .set_attr("FInplaceIdentity", [](const NodeAttrs& attrs){ return std::vector{true}; }) .add_argument("scalar", "float", "scalar input"); -MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientreversal) +MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientmultiplier) .set_attr("TIsBackward", true) .set_attr("FInferStorageType", BinaryScalarStorageType) .set_attr("FCompute", BinaryScalarOp::Compute) diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_multiplier_op.cu similarity index 91% rename from src/operator/contrib/gradient_reversal_op.cu rename to src/operator/contrib/gradient_multiplier_op.cu index ed8b3cdda8dc..7159cea9805d 100644 --- a/src/operator/contrib/gradient_reversal_op.cu +++ b/src/operator/contrib/gradient_multiplier_op.cu @@ -19,7 +19,7 @@ /*! * Copyright (c) 2018 by Contributors - * \file gradient_reversal_op.cu + * \file gradient_multiplier_op.cu * \brief * \author Istvan Fehervari */ @@ -29,11 +29,11 @@ namespace mxnet { namespace op { -NNVM_REGISTER_OP(_contrib_gradientreversal) +NNVM_REGISTER_OP(_contrib_gradientmultiplier) .set_attr("FComputeEx", UnaryOp::IdentityComputeEx) .set_attr("FCompute", UnaryOp::IdentityCompute); -NNVM_REGISTER_OP(_contrib_backward_gradientreversal) +NNVM_REGISTER_OP(_contrib_backward_gradientmultiplier) .set_attr("FCompute", BinaryScalarOp::Compute) .set_attr("FComputeEx", BinaryScalarOp::ComputeEx); diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index f2ad1dbbbc3b..16e6e179f7a2 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -261,8 +261,8 @@ def test_multibox_target_op(): assert_array_equal(loc_mask.asnumpy(), expected_loc_mask) assert_array_equal(cls_target.asnumpy(), expected_cls_target) -def test_gradient_reversal_op(): - # We use the quadratic function in combination with gradient reversal +def test_gradient_multiplier_op(): + # We use the quadratic function in combination with gradient multiplier def f(x, a, b, c): return a * x**2 + b * x + c @@ -273,7 +273,7 @@ def f(x, a, b, c): data = mx.symbol.Variable('data') quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c) - gr_q_sym = mx.sym.contrib.gradientreversal(quad_sym, scalar=l) + gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=l) for dtype in [np.float16, np.float32, np.float64]: for ndim in range(1, 6): @@ -284,7 +284,7 @@ def f(x, a, b, c): # check imperative forward output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c) - output = mx.nd.contrib.gradientreversal(output, scalar=l) + output = mx.nd.contrib.gradientmultiplier(output, scalar=l) assert_almost_equal(output.asnumpy(), expected, rtol=1e-2 if dtype is np.float16 else 1e-5, atol=1e-2 if dtype is np.float16 else 1e-5) From 54ae4f0007af370a4fce8167574a6360d1322df4 Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Mon, 14 Jan 2019 17:55:39 -0800 Subject: [PATCH 13/14] Update test_contrib_operator.py Retrigger flaky test --- tests/python/unittest/test_contrib_operator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py index 16e6e179f7a2..aac807660af1 100644 --- a/tests/python/unittest/test_contrib_operator.py +++ b/tests/python/unittest/test_contrib_operator.py @@ -269,22 +269,22 @@ def f(x, a, b, c): a = np.random.random_sample() b = np.random.random_sample() c = np.random.random_sample() - l = np.random.random_sample() - 0.5 + m = np.random.random_sample() - 0.5 data = mx.symbol.Variable('data') quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c) - gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=l) + gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=m) for dtype in [np.float16, np.float32, np.float64]: for ndim in range(1, 6): shape = rand_shape_nd(ndim, 5) data_np = np.random.randn(*shape).astype(dtype) expected = f(data_np, a, b, c) - backward_expected = (2 * a * data_np + b) * l + backward_expected = (2 * a * data_np + b) * m # check imperative forward output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c) - output = mx.nd.contrib.gradientmultiplier(output, scalar=l) + output = mx.nd.contrib.gradientmultiplier(output, scalar=m) assert_almost_equal(output.asnumpy(), expected, rtol=1e-2 if dtype is np.float16 else 1e-5, atol=1e-2 if dtype is np.float16 else 1e-5) From 3983458f8332fa4f05500efe4c698491b097242d Mon Sep 17 00:00:00 2001 From: Istvan Fehervari Date: Thu, 17 Jan 2019 07:25:48 -0800 Subject: [PATCH 14/14] Update gradient_multiplier_op.cc Improved the description of the scalar multiplier --- src/operator/contrib/gradient_multiplier_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/contrib/gradient_multiplier_op.cc b/src/operator/contrib/gradient_multiplier_op.cc index 9d15ee14cd12..47f891ef802b 100644 --- a/src/operator/contrib/gradient_multiplier_op.cc +++ b/src/operator/contrib/gradient_multiplier_op.cc @@ -73,7 +73,7 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs, MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientmultiplier) .describe(R"code(This operator implements the gradient multiplier function. In forward pass it acts as an identity transform. During backpropagation it -multiplies the gradient from the subsequent level by a scalar factor and passes it to +multiplies the gradient from the subsequent level by a scalar factor lambda and passes it to the preceding layer. )code" ADD_FILELINE) .set_attr_parser([](NodeAttrs* attrs) { @@ -87,7 +87,7 @@ the preceding layer. [](const NodeAttrs& attrs){ return std::vector{true}; }) -.add_argument("scalar", "float", "scalar input"); +.add_argument("scalar", "float", "lambda multiplier"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientmultiplier) .set_attr("TIsBackward", true)