From 735438aea76f28b92c89629ccfc8612b53500a33 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Wed, 12 Dec 2018 22:36:17 -0800
Subject: [PATCH 01/14] Added the gradient reversal contrib operator

Missing test for backwards pass
---
 .../contrib/gradient_reversal_op-inl.h        | 213 ++++++++++++++++++
 src/operator/contrib/gradient_reversal_op.cc  |  68 ++++++
 src/operator/contrib/gradient_reversal_op.cu  |  39 ++++
 .../python/unittest/test_contrib_operator.py  |   5 +
 4 files changed, 325 insertions(+)
 create mode 100644 src/operator/contrib/gradient_reversal_op-inl.h
 create mode 100644 src/operator/contrib/gradient_reversal_op.cc
 create mode 100644 src/operator/contrib/gradient_reversal_op.cu
diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h
new file mode 100644
index 000000000000..00753becbf85
--- /dev/null
+++ b/src/operator/contrib/gradient_reversal_op-inl.h
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file gradient_reversal_op-inl.h
+ * \brief
+ * \author Istvan Fehervari
+*/
+#ifndef MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
+#define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
+
+#include <mxnet/operator_util.h>
+#include "../mshadow_op.h"
+#include "../mxnet_op.h"
+#include "../operator_common.h"
+#include "../elemwise_op_common.h"
+#include "../tensor/init_op.h"
+
+namespace mxnet {
+namespace op {
+
+struct GradientReversalParam : public dmlc::Parameter<GradientReversalParam> {
+  float l;
+  DMLC_DECLARE_PARAMETER(GradientReversalParam) {
+    DMLC_DECLARE_FIELD(l)
+      .set_default(0.0)
+      .describe("Lambda coefficient of the gradient reversal function.");
+  }
+};
+
+inline bool GradientReversalOpShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape>* in_attrs,
+                             std::vector<TShape>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
+  SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
+  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U;
+}
+
+inline bool GradientReversalOpType(const nnvm::NodeAttrs& attrs,
+                            std::vector<int>* in_attrs,
+                            std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
+  TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
+  return out_attrs->at(0) != -1;
+}
+
+inline bool GradientReversalOpStorageType(const nnvm::NodeAttrs& attrs,
+                                   const int dev_mask,
+                                   DispatchMode* dispatch_mode,
+                                   std::vector<int>* in_attrs,
+                                   std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  const int in_stype = in_attrs->at(0);
+  int& out_stype = out_attrs->at(0);
+  bool dispatched = false;
+  if (!dispatched && in_stype == kDefaultStorage) {
+    // dns -> dns
+    dispatched = storage_type_assign(&out_stype, kDefaultStorage,
+                                     dispatch_mode, DispatchMode::kFCompute);
+  }
+  if (!dispatched && in_stype == kCSRStorage) {
+    // csr -> csr
+    dispatched = storage_type_assign(&out_stype, kCSRStorage,
+                                     dispatch_mode, DispatchMode::kFComputeEx);
+  }
+  if (!dispatched) {
+    dispatched = dispatch_fallback(out_attrs, dispatch_mode);
+  }
+  return dispatched;
+}
+
+template<int req>
+struct gradient_reversal_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data) {
+    KERNEL_ASSIGN(out_data[i], req, in_data[i]);
+  }
+};
+
+template<int req>
+struct gradient_reversal_backward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* out_grad,
+                                  const DType* in_data, const float l) {
+    KERNEL_ASSIGN(in_grad[i], req, out_grad[i] * -l);
+  }
+};
+
+template<typename xpu>
+void GradientReversalOpForward(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const std::vector<TBlob>& inputs,
+                        const std::vector<OpReqType>& req,
+                        const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const TBlob& in_data = inputs[0];
+  const TBlob& out_data = outputs[0];
+  using namespace mxnet_op;
+  MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+      Kernel<gradient_reversal_forward<req_type>, xpu>::Launch(
+          s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>());
+    });
+  });
+}
+
+template<typename xpu>
+void GradientReversalOpForwardCsrImpl(const OpContext& ctx,
+                               const NDArray& input,
+                               const OpReqType req,
+                               const NDArray& output) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using namespace csr;
+  if (req == kNullOp) return;
+  CHECK_EQ(req, kWriteTo) << "GradientReversalOp with CSR only supports kWriteTo";
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  if (!input.storage_initialized()) {
+    FillZerosCsrImpl(s, output);
+    return;
+  }
+  const nnvm::dim_t nnz = input.storage_shape()[0];
+  const nnvm::dim_t num_rows = output.shape()[0];
+  output.CheckAndAlloc({Shape1(num_rows + 1), Shape1(nnz)});
+  CHECK_EQ(output.aux_type(kIdx), output.aux_type(kIndPtr))
+    << "The dtypes of indices and indptr don't match";
+  MSHADOW_TYPE_SWITCH(output.dtype(), DType, {
+    MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), IType, {
+      MXNET_ASSIGN_REQ_SWITCH(req, req_type, {
+        Kernel<gradient_reversal_forward<req_type>, xpu>::Launch(
+            s, nnz, output.data().dptr<DType>(), input.data().dptr<DType>());
+        Copy(output.aux_data(kIdx).FlatTo1D<xpu, IType>(s),
+             input.aux_data(kIdx).FlatTo1D<xpu, IType>(s), s);
+        Copy(output.aux_data(kIndPtr).FlatTo1D<xpu, IType>(s),
+             input.aux_data(kIndPtr).FlatTo1D<xpu, IType>(s), s);
+      });
+    });
+  });
+}
+
+template<typename xpu>
+void GradientReversalOpForwardEx(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const std::vector<NDArray>& inputs,
+                          const std::vector<OpReqType>& req,
+                          const std::vector<NDArray>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  const auto in_stype = inputs[0].storage_type();
+  const auto out_stype = outputs[0].storage_type();
+  if (in_stype == kCSRStorage && out_stype == kCSRStorage) {
+    GradientReversalOpForwardCsrImpl<xpu>(ctx, inputs[0], req[0], outputs[0]);
+  } else {
+    LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
+  }
+}
+
+template<typename xpu>
+void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs,
+                         const OpContext& ctx,
+                         const std::vector<TBlob>& inputs,
+                         const std::vector<OpReqType>& req,
+                         const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const TBlob& out_grad = inputs[0];
+  const TBlob& in_data = inputs[1];
+  const TBlob& in_grad = outputs[0];
+  const GradientReversalParam& param = nnvm::get<GradientReversalParam>(attrs.parsed);
+  using namespace mxnet_op;
+  MSHADOW_TYPE_SWITCH(out_grad.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+      Kernel<gradient_reversal_backward<req_type>, xpu>::Launch(
+          s, in_grad.Size(), in_grad.dptr<DType>(), out_grad.dptr<DType>(),
+          in_data.dptr<DType>(), param.l);
+    });
+  });
+}
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
\ No newline at end of file
diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
new file mode 100644
index 000000000000..4ead4d617c74
--- /dev/null
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file gradient_reversal_op.cc
+ * \brief
+ * \author Istvan Fehervari
+*/
+#include "./gradient_reversal_op-inl.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(GradientReversalParam);
+
+NNVM_REGISTER_OP(_contrib_gradientreversal)
+.describe(R"code(This operators implements the gradient reversal function.
+In forward pass it acts as an identity tranform. During backpropagation it 
+multiplies the gradient from the subsequent level by −l and passes it to
+the preceding layer.
+
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<GradientReversalParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data"};
+  })
+.set_attr<nnvm::FInferShape>("FInferShape", GradientReversalOpShape)
+.set_attr<nnvm::FInferType>("FInferType", GradientReversalOpType)
+.set_attr<FInferStorageType>("FInferStorageType", GradientReversalOpStorageType)
+.set_attr<FCompute>("FCompute<cpu>", GradientReversalOpForward<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"})
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(GradientReversalParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
+.set_attr_parser(ParamParser<GradientReversalParam>)
+.set_num_inputs(2)
+.set_num_outputs(1)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", GradientReversalOpBackward<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", GradientReversalOpForwardEx<cpu>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu
new file mode 100644
index 000000000000..b391cebf1eec
--- /dev/null
+++ b/src/operator/contrib/gradient_reversal_op.cu
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file gradient_reversal_op.cu
+ * \brief
+ * \author Istvan Fehervari
+*/
+#include "./gradient_reversal_op-inl.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_contrib_gradientreversal)
+.set_attr<FComputeEx>("FComputeEx<gpu>", GradientReversalOpForwardEx<gpu>)
+.set_attr<FCompute>("FCompute<gpu>", GradientReversalOpForward<gpu>);
+
+NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
+.set_attr<FCompute>("FCompute<gpu>", GradientReversalOpBackward<gpu>);
+
+}  // namespace op
+}  // namespace mxnet
\ No newline at end of file
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 43d3db648a85..1bac701179fa 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -261,6 +261,11 @@ def test_multibox_target_op():
     assert_array_equal(loc_mask.asnumpy(), expected_loc_mask)
     assert_array_equal(cls_target.asnumpy(), expected_cls_target)
 
+def test_gradient_reversal_op():
+    input = mx.nd.normal((5,5))
+
+    output = mx.nd.contrib.gradientreversal(input, l=1.0)
+    assert_array_equal(input, output)
 
 if __name__ == '__main__':
     import nose

From 44eda552b9202afe680b216905ef7984e2592d15 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Wed, 12 Dec 2018 23:01:11 -0800
Subject: [PATCH 02/14] Fixed linting errors

---
 src/operator/contrib/gradient_reversal_op-inl.h | 3 ++-
 src/operator/contrib/gradient_reversal_op.cu    | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h
index 00753becbf85..3662312a535f 100644
--- a/src/operator/contrib/gradient_reversal_op-inl.h
+++ b/src/operator/contrib/gradient_reversal_op-inl.h
@@ -27,6 +27,7 @@
 #define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
 
 #include <mxnet/operator_util.h>
+#include <vector>
 #include "../mshadow_op.h"
 #include "../mxnet_op.h"
 #include "../operator_common.h"
@@ -210,4 +211,4 @@ void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
\ No newline at end of file
+#endif  // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu
index b391cebf1eec..cf4871dd7c4e 100644
--- a/src/operator/contrib/gradient_reversal_op.cu
+++ b/src/operator/contrib/gradient_reversal_op.cu
@@ -36,4 +36,4 @@ NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
 .set_attr<FCompute>("FCompute<gpu>", GradientReversalOpBackward<gpu>);
 
 }  // namespace op
-}  // namespace mxnet
\ No newline at end of file
+}  // namespace mxnet

From e9bf741118181909356a7ac1ca1bee3128b9bc29 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Thu, 13 Dec 2018 10:31:15 -0800
Subject: [PATCH 03/14] Fixed forward test

---
 src/operator/contrib/gradient_reversal_op.cc   | 3 +--
 tests/python/unittest/test_contrib_operator.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
index 4ead4d617c74..f88568be990e 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -33,9 +33,8 @@ DMLC_REGISTER_PARAMETER(GradientReversalParam);
 NNVM_REGISTER_OP(_contrib_gradientreversal)
 .describe(R"code(This operators implements the gradient reversal function.
 In forward pass it acts as an identity tranform. During backpropagation it 
-multiplies the gradient from the subsequent level by −l and passes it to
+multiplies the gradient from the subsequent level by a negative factor and passes it to
 the preceding layer.
-
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<GradientReversalParam>)
 .set_num_inputs(1)
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 1bac701179fa..ac54db0b4078 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -262,7 +262,7 @@ def test_multibox_target_op():
     assert_array_equal(cls_target.asnumpy(), expected_cls_target)
 
 def test_gradient_reversal_op():
-    input = mx.nd.normal((5,5))
+    input = mx.nd.random.normal(shape=(5,5))
 
     output = mx.nd.contrib.gradientreversal(input, l=1.0)
     assert_array_equal(input, output)

From dfd19066e93d5929a40ec1c146c321e46665cf23 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Thu, 13 Dec 2018 11:17:56 -0800
Subject: [PATCH 04/14] Added random forward / backward test for gradient
 reversal

---
 .../python/unittest/test_contrib_operator.py  | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index ac54db0b4078..2a8a6735d07e 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -262,10 +262,40 @@ def test_multibox_target_op():
     assert_array_equal(cls_target.asnumpy(), expected_cls_target)
 
 def test_gradient_reversal_op():
-    input = mx.nd.random.normal(shape=(5,5))
+    # We use the quadratic function in combination with gradient reversal
+    def f(x, a, b, c):
+        return a * x**2 + b * x + c
 
-    output = mx.nd.contrib.gradientreversal(input, l=1.0)
-    assert_array_equal(input, output)
+    a = np.random.random_sample()
+    b = np.random.random_sample()
+    c = np.random.random_sample()
+    l = np.random.random_sample()
+    data = mx.symbol.Variable('data')
+    quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
+    gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l)
+
+    for dtype in [np.float16, np.float32, np.float64]:
+        for ndim in range(1, 6):
+            shape = rand_shape_nd(ndim, 5)
+            data_np = np.random.randn(*shape).astype(dtype)
+            expected = f(data_np, a, b, c)
+            backward_expected = (2 * a * data_np + b) * -l
+
+            # check imperative forward
+            output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c)
+            output = mx.nd.contrib.gradientreversal(output, l=l)
+            assert_almost_equal(output.asnumpy(),expected,
+                                rtol=1e-2 if dtype is np.float16 else 1e-5,
+                                atol=1e-2 if dtype is np.float16 else 1e-5)
+            # check forward
+            check_symbolic_forward(gr_q_sym, [data_np], [expected],
+                                    rtol=1e-2 if dtype is np.float16 else 1e-5,
+                                    atol=1e-2 if dtype is np.float16 else 1e-5)
+            # check backward
+            check_symbolic_backward(gr_q_sym, [data_np], [np.ones(expected.shape)],
+                                        [backward_expected],
+                                        rtol=1e-2 if dtype is np.float16 else 1e-5,
+                                        atol=1e-2 if dtype is np.float16 else 1e-5)
 
 if __name__ == '__main__':
     import nose

From 5c7353301dce3de076dd9a9b9239950a4b332fa4 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sat, 15 Dec 2018 08:27:46 -0800
Subject: [PATCH 05/14] Update test_contrib_operator.py

---
 tests/python/unittest/test_contrib_operator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 2a8a6735d07e..51aadb4e892d 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -270,6 +270,7 @@ def f(x, a, b, c):
     b = np.random.random_sample()
     c = np.random.random_sample()
     l = np.random.random_sample()
+    
     data = mx.symbol.Variable('data')
     quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
     gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l)

From 0bc7986affc6a44f7feab0c1229c2882c5232855 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Thu, 10 Jan 2019 18:00:45 -0800
Subject: [PATCH 06/14] Fixed typo in gradient reversal op description

---
 src/operator/contrib/gradient_reversal_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
index f88568be990e..9d1bff3912a3 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -31,7 +31,7 @@ namespace op {
 DMLC_REGISTER_PARAMETER(GradientReversalParam);
 
 NNVM_REGISTER_OP(_contrib_gradientreversal)
-.describe(R"code(This operators implements the gradient reversal function.
+.describe(R"code(This operator implements the gradient reversal function.
 In forward pass it acts as an identity tranform. During backpropagation it 
 multiplies the gradient from the subsequent level by a negative factor and passes it to
 the preceding layer.

From ad72f41d9e984683cf233705e467b453845f0e62 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sat, 12 Jan 2019 16:25:51 -0800
Subject: [PATCH 07/14] Replace forward code with the identitiy implementation

---
 .../contrib/gradient_reversal_op-inl.h        | 129 ------------------
 src/operator/contrib/gradient_reversal_op.cc  |  18 +--
 src/operator/contrib/gradient_reversal_op.cu  |   5 +-
 3 files changed, 13 insertions(+), 139 deletions(-)

diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h
index 3662312a535f..ac7957cd0831 100644
--- a/src/operator/contrib/gradient_reversal_op-inl.h
+++ b/src/operator/contrib/gradient_reversal_op-inl.h
@@ -46,62 +46,6 @@ struct GradientReversalParam : public dmlc::Parameter<GradientReversalParam> {
   }
 };
 
-inline bool GradientReversalOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-  SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
-  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U;
-}
-
-inline bool GradientReversalOpType(const nnvm::NodeAttrs& attrs,
-                            std::vector<int>* in_attrs,
-                            std::vector<int>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-
-  TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-  TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
-  return out_attrs->at(0) != -1;
-}
-
-inline bool GradientReversalOpStorageType(const nnvm::NodeAttrs& attrs,
-                                   const int dev_mask,
-                                   DispatchMode* dispatch_mode,
-                                   std::vector<int>* in_attrs,
-                                   std::vector<int>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const int in_stype = in_attrs->at(0);
-  int& out_stype = out_attrs->at(0);
-  bool dispatched = false;
-  if (!dispatched && in_stype == kDefaultStorage) {
-    // dns -> dns
-    dispatched = storage_type_assign(&out_stype, kDefaultStorage,
-                                     dispatch_mode, DispatchMode::kFCompute);
-  }
-  if (!dispatched && in_stype == kCSRStorage) {
-    // csr -> csr
-    dispatched = storage_type_assign(&out_stype, kCSRStorage,
-                                     dispatch_mode, DispatchMode::kFComputeEx);
-  }
-  if (!dispatched) {
-    dispatched = dispatch_fallback(out_attrs, dispatch_mode);
-  }
-  return dispatched;
-}
-
-template<int req>
-struct gradient_reversal_forward {
-  template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data) {
-    KERNEL_ASSIGN(out_data[i], req, in_data[i]);
-  }
-};
-
 template<int req>
 struct gradient_reversal_backward {
   template<typename DType>
@@ -111,79 +55,6 @@ struct gradient_reversal_backward {
   }
 };
 
-template<typename xpu>
-void GradientReversalOpForward(const nnvm::NodeAttrs& attrs,
-                        const OpContext& ctx,
-                        const std::vector<TBlob>& inputs,
-                        const std::vector<OpReqType>& req,
-                        const std::vector<TBlob>& outputs) {
-  CHECK_EQ(inputs.size(), 1U);
-  CHECK_EQ(outputs.size(), 1U);
-  CHECK_EQ(req.size(), 1U);
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TBlob& in_data = inputs[0];
-  const TBlob& out_data = outputs[0];
-  using namespace mxnet_op;
-  MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-      Kernel<gradient_reversal_forward<req_type>, xpu>::Launch(
-          s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>());
-    });
-  });
-}
-
-template<typename xpu>
-void GradientReversalOpForwardCsrImpl(const OpContext& ctx,
-                               const NDArray& input,
-                               const OpReqType req,
-                               const NDArray& output) {
-  using namespace mshadow;
-  using namespace mxnet_op;
-  using namespace csr;
-  if (req == kNullOp) return;
-  CHECK_EQ(req, kWriteTo) << "GradientReversalOp with CSR only supports kWriteTo";
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  if (!input.storage_initialized()) {
-    FillZerosCsrImpl(s, output);
-    return;
-  }
-  const nnvm::dim_t nnz = input.storage_shape()[0];
-  const nnvm::dim_t num_rows = output.shape()[0];
-  output.CheckAndAlloc({Shape1(num_rows + 1), Shape1(nnz)});
-  CHECK_EQ(output.aux_type(kIdx), output.aux_type(kIndPtr))
-    << "The dtypes of indices and indptr don't match";
-  MSHADOW_TYPE_SWITCH(output.dtype(), DType, {
-    MSHADOW_IDX_TYPE_SWITCH(output.aux_type(kIdx), IType, {
-      MXNET_ASSIGN_REQ_SWITCH(req, req_type, {
-        Kernel<gradient_reversal_forward<req_type>, xpu>::Launch(
-            s, nnz, output.data().dptr<DType>(), input.data().dptr<DType>());
-        Copy(output.aux_data(kIdx).FlatTo1D<xpu, IType>(s),
-             input.aux_data(kIdx).FlatTo1D<xpu, IType>(s), s);
-        Copy(output.aux_data(kIndPtr).FlatTo1D<xpu, IType>(s),
-             input.aux_data(kIndPtr).FlatTo1D<xpu, IType>(s), s);
-      });
-    });
-  });
-}
-
-template<typename xpu>
-void GradientReversalOpForwardEx(const nnvm::NodeAttrs& attrs,
-                          const OpContext& ctx,
-                          const std::vector<NDArray>& inputs,
-                          const std::vector<OpReqType>& req,
-                          const std::vector<NDArray>& outputs) {
-  CHECK_EQ(inputs.size(), 1U);
-  CHECK_EQ(outputs.size(), 1U);
-  CHECK_EQ(req.size(), 1U);
-  const auto in_stype = inputs[0].storage_type();
-  const auto out_stype = outputs[0].storage_type();
-  if (in_stype == kCSRStorage && out_stype == kCSRStorage) {
-    GradientReversalOpForwardCsrImpl<xpu>(ctx, inputs[0], req[0], outputs[0]);
-  } else {
-    LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
-  }
-}
-
 template<typename xpu>
 void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs,
                          const OpContext& ctx,
diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
index 9d1bff3912a3..8ada72990fc4 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -24,6 +24,7 @@
  * \author Istvan Fehervari
 */
 #include "./gradient_reversal_op-inl.h"
+#include "../tensor/elemwise_unary_op.h"
 
 namespace mxnet {
 namespace op {
@@ -43,14 +44,15 @@ the preceding layer.
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", GradientReversalOpShape)
-.set_attr<nnvm::FInferType>("FInferType", GradientReversalOpType)
-.set_attr<FInferStorageType>("FInferStorageType", GradientReversalOpStorageType)
-.set_attr<FCompute>("FCompute<cpu>", GradientReversalOpForward<cpu>)
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
+.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"})
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs) {
-    return std::vector<std::pair<int, int> >{{0, 0}};
+.set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
+  [](const NodeAttrs& attrs){
+    return std::vector<bool>{true};
   })
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(GradientReversalParam::__FIELDS__());
@@ -61,7 +63,7 @@ NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", GradientReversalOpBackward<cpu>)
-.set_attr<FComputeEx>("FComputeEx<cpu>", GradientReversalOpForwardEx<cpu>);
+.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>);
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu
index cf4871dd7c4e..985d1898cef6 100644
--- a/src/operator/contrib/gradient_reversal_op.cu
+++ b/src/operator/contrib/gradient_reversal_op.cu
@@ -24,13 +24,14 @@
  * \author Istvan Fehervari
 */
 #include "./gradient_reversal_op-inl.h"
+#include "../tensor/elemwise_unary_op.h"
 
 namespace mxnet {
 namespace op {
 
 NNVM_REGISTER_OP(_contrib_gradientreversal)
-.set_attr<FComputeEx>("FComputeEx<gpu>", GradientReversalOpForwardEx<gpu>)
-.set_attr<FCompute>("FCompute<gpu>", GradientReversalOpForward<gpu>);
+.set_attr<FComputeEx>("FComputeEx<gpu>", UnaryOp::IdentityComputeEx<gpu>)
+.set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
 
 NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
 .set_attr<FCompute>("FCompute<gpu>", GradientReversalOpBackward<gpu>);

From 912f2a0f69517602255ab1785e04e94db711cb64 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sun, 13 Jan 2019 13:24:43 -0800
Subject: [PATCH 08/14] Fixed typos in function docs

---
 include/mxnet/op_attr_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h
index dd818457f827..41be554953fd 100644
--- a/include/mxnet/op_attr_types.h
+++ b/include/mxnet/op_attr_types.h
@@ -254,7 +254,7 @@ using FNDArrayFunction = std::function<void (const nnvm::NodeAttrs& attrs,
                                              const std::vector<NDArray>& inputs,
                                              std::vector<NDArray>* outputs)>;
 /*!
- * \brief Resiger a compute function for simple stateless forward only operator
+ * \brief Register a compute function for simple stateless forward only operator
  *
  * \note Register under "FCompute<cpu>" and "FCompute<gpu>"
  */
@@ -264,7 +264,7 @@ using FCompute = std::function<void (const nnvm::NodeAttrs& attrs,
                                      const std::vector<OpReqType>& req,
                                      const std::vector<TBlob>& outputs)>;
 /*!
- * \brief Resiger an NDArray compute function for simple stateless forward only operator
+ * \brief Register an NDArray compute function for simple stateless forward only operator
  * \note Register under "FComputeEx<xpu>" and "FComputeEx<xpu>"
  *       Dispatched only when inferred dispatch_mode is FDispatchComputeEx
  */
@@ -275,7 +275,7 @@ using FComputeEx = std::function<void (const nnvm::NodeAttrs& attrs,
                                        const std::vector<NDArray>& outputs)>;
 
 /*!
- * \brief Resiger a storage and dispatch mode inference function based on
+ * \brief Register a storage and dispatch mode inference function based on
  *        storage types of the inputs and outputs, and the dev_mask for the operator.
  *
  * \note Register under "FInferStorageType"

From f865e14906d8b50019e6ff5053b36375f52ca7d4 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sun, 13 Jan 2019 13:25:13 -0800
Subject: [PATCH 09/14] Changed default behavior to identity

---
 src/operator/contrib/gradient_reversal_op-inl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h
index ac7957cd0831..cc445158fdd8 100644
--- a/src/operator/contrib/gradient_reversal_op-inl.h
+++ b/src/operator/contrib/gradient_reversal_op-inl.h
@@ -41,7 +41,7 @@ struct GradientReversalParam : public dmlc::Parameter<GradientReversalParam> {
   float l;
   DMLC_DECLARE_PARAMETER(GradientReversalParam) {
     DMLC_DECLARE_FIELD(l)
-      .set_default(0.0)
+      .set_default(1.0)
       .describe("Lambda coefficient of the gradient reversal function.");
   }
 };

From 0cd8416340d7530be2dc453986ddcd01a0d247c8 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sun, 13 Jan 2019 14:06:21 -0800
Subject: [PATCH 10/14] Replaced backward code with scalar_mul

---
 .../contrib/gradient_reversal_op-inl.h        | 85 -------------------
 src/operator/contrib/gradient_reversal_op.cc  | 68 ++++++++++-----
 src/operator/contrib/gradient_reversal_op.cu  |  5 +-
 3 files changed, 52 insertions(+), 106 deletions(-)
 delete mode 100644 src/operator/contrib/gradient_reversal_op-inl.h

diff --git a/src/operator/contrib/gradient_reversal_op-inl.h b/src/operator/contrib/gradient_reversal_op-inl.h
deleted file mode 100644
index cc445158fdd8..000000000000
--- a/src/operator/contrib/gradient_reversal_op-inl.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2018 by Contributors
- * \file gradient_reversal_op-inl.h
- * \brief
- * \author Istvan Fehervari
-*/
-#ifndef MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
-#define MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
-
-#include <mxnet/operator_util.h>
-#include <vector>
-#include "../mshadow_op.h"
-#include "../mxnet_op.h"
-#include "../operator_common.h"
-#include "../elemwise_op_common.h"
-#include "../tensor/init_op.h"
-
-namespace mxnet {
-namespace op {
-
-struct GradientReversalParam : public dmlc::Parameter<GradientReversalParam> {
-  float l;
-  DMLC_DECLARE_PARAMETER(GradientReversalParam) {
-    DMLC_DECLARE_FIELD(l)
-      .set_default(1.0)
-      .describe("Lambda coefficient of the gradient reversal function.");
-  }
-};
-
-template<int req>
-struct gradient_reversal_backward {
-  template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* out_grad,
-                                  const DType* in_data, const float l) {
-    KERNEL_ASSIGN(in_grad[i], req, out_grad[i] * -l);
-  }
-};
-
-template<typename xpu>
-void GradientReversalOpBackward(const nnvm::NodeAttrs& attrs,
-                         const OpContext& ctx,
-                         const std::vector<TBlob>& inputs,
-                         const std::vector<OpReqType>& req,
-                         const std::vector<TBlob>& outputs) {
-  CHECK_EQ(inputs.size(), 2U);
-  CHECK_EQ(outputs.size(), 1U);
-  CHECK_EQ(req.size(), 1U);
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TBlob& out_grad = inputs[0];
-  const TBlob& in_data = inputs[1];
-  const TBlob& in_grad = outputs[0];
-  const GradientReversalParam& param = nnvm::get<GradientReversalParam>(attrs.parsed);
-  using namespace mxnet_op;
-  MSHADOW_TYPE_SWITCH(out_grad.type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-      Kernel<gradient_reversal_backward<req_type>, xpu>::Launch(
-          s, in_grad.Size(), in_grad.dptr<DType>(), out_grad.dptr<DType>(),
-          in_data.dptr<DType>(), param.l);
-    });
-  });
-}
-
-}  // namespace op
-}  // namespace mxnet
-
-#endif  // MXNET_OPERATOR_CONTRIB_GRADIENT_REVERSAL_OP_INL_H_
diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
index 8ada72990fc4..e8a6b35fe6fd 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -23,29 +23,62 @@
  * \brief
  * \author Istvan Fehervari
 */
-#include "./gradient_reversal_op-inl.h"
 #include "../tensor/elemwise_unary_op.h"
+#include "../tensor/elemwise_binary_scalar_op.h"
 
 namespace mxnet {
 namespace op {
 
-DMLC_REGISTER_PARAMETER(GradientReversalParam);
+static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs,
+                                    const int dev_mask,
+                                    DispatchMode* dispatch_mode,
+                                    std::vector<int> *in_attrs,
+                                    std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1);
+  CHECK_EQ(out_attrs->size(), 1);
+  const auto in_stype = in_attrs->at(0);
+  auto &out_stype = out_attrs->at(0);
+  bool dispatched = false;
+  if (!dispatched && (in_stype == kDefaultStorage)) {
+    // dense -> dense
+    dispatched = storage_type_assign(&out_stype, kDefaultStorage,
+                                     dispatch_mode, DispatchMode::kFCompute);
+  }
+  if (!dispatched && in_stype == kRowSparseStorage) {
+    // row sparse -> row sparse
+    dispatched = storage_type_assign(&out_stype, kRowSparseStorage,
+                                     dispatch_mode, DispatchMode::kFComputeEx);
+    // FComputeEx can handle dns output on cpu, too
+    if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) {
+      DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx);
+      dispatched = true;
+    }
+  }
+  if (!dispatched && in_stype == kCSRStorage) {
+    // csr -> csr
+    dispatched = storage_type_assign(&out_stype, kCSRStorage,
+                                     dispatch_mode, DispatchMode::kFComputeEx);
+    // FComputeEx can handle dns output on cpu, too
+    if (dev_mask == cpu::kDevMask && out_stype == kDefaultStorage) {
+      DISPATCH_MODE_ASSIGN_CHECK(dispatch_mode, 0, DispatchMode::kFComputeEx);
+      dispatched = true;
+    }
+  }
+  if (!dispatched) {
+    dispatched = dispatch_fallback(out_attrs, dispatch_mode);
+  }
+  return dispatched;
+}
 
-NNVM_REGISTER_OP(_contrib_gradientreversal)
+MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal)
 .describe(R"code(This operator implements the gradient reversal function.
 In forward pass it acts as an identity tranform. During backpropagation it 
 multiplies the gradient from the subsequent level by a negative factor and passes it to
 the preceding layer.
 )code" ADD_FILELINE)
-.set_attr_parser(ParamParser<GradientReversalParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<nnvm::FListInputNames>("FListInputNames",
-  [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"data"};
+.set_attr_parser([](NodeAttrs* attrs) {
+    attrs->parsed = std::stod(attrs->dict["scalar"]);
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>)
@@ -54,16 +87,13 @@ the preceding layer.
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
   })
-.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
-.add_arguments(GradientReversalParam::__FIELDS__());
+.add_argument("scalar", "float", "scalar input");
 
-NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
-.set_attr_parser(ParamParser<GradientReversalParam>)
-.set_num_inputs(2)
-.set_num_outputs(1)
+MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientreversal)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-.set_attr<FCompute>("FCompute<cpu>", GradientReversalOpBackward<cpu>)
-.set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>);
+.set_attr<FInferStorageType>("FInferStorageType", BinaryScalarStorageType)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::mul>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", BinaryScalarOp::ComputeEx<cpu, op::mshadow_op::mul>);
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_reversal_op.cu
index 985d1898cef6..ed8b3cdda8dc 100644
--- a/src/operator/contrib/gradient_reversal_op.cu
+++ b/src/operator/contrib/gradient_reversal_op.cu
@@ -23,8 +23,8 @@
  * \brief
  * \author Istvan Fehervari
 */
-#include "./gradient_reversal_op-inl.h"
 #include "../tensor/elemwise_unary_op.h"
+#include "../tensor/elemwise_binary_scalar_op.h"
 
 namespace mxnet {
 namespace op {
@@ -34,7 +34,8 @@ NNVM_REGISTER_OP(_contrib_gradientreversal)
 .set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
 
 NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
-.set_attr<FCompute>("FCompute<gpu>", GradientReversalOpBackward<gpu>);
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarOp::Compute<gpu, op::mshadow_op::mul>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", BinaryScalarOp::ComputeEx<gpu, op::mshadow_op::mul>);
 
 }  // namespace op
 }  // namespace mxnet

From 19194b0f67ed5939b4557cd24b40ac33121c4b7d Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Sun, 13 Jan 2019 15:31:29 -0800
Subject: [PATCH 11/14] Fixed backward operator and unit test

---
 src/operator/contrib/gradient_reversal_op.cc   |  4 ++--
 tests/python/unittest/test_contrib_operator.py | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_reversal_op.cc
index e8a6b35fe6fd..63877e8807df 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_reversal_op.cc
@@ -72,7 +72,7 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs,
 
 MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal)
 .describe(R"code(This operator implements the gradient reversal function.
-In forward pass it acts as an identity tranform. During backpropagation it 
+In forward pass it acts as an identity transform. During backpropagation it
 multiplies the gradient from the subsequent level by a negative factor and passes it to
 the preceding layer.
 )code" ADD_FILELINE)
@@ -82,7 +82,7 @@ the preceding layer.
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_contrib_backward_gradientreversal"})
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientreversal"})
 .set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 51aadb4e892d..f2ad1dbbbc3b 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -269,23 +269,23 @@ def f(x, a, b, c):
     a = np.random.random_sample()
     b = np.random.random_sample()
     c = np.random.random_sample()
-    l = np.random.random_sample()
+    l = np.random.random_sample() - 0.5
     
     data = mx.symbol.Variable('data')
     quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
-    gr_q_sym = mx.sym.contrib.gradientreversal(data=quad_sym, l=l)
+    gr_q_sym = mx.sym.contrib.gradientreversal(quad_sym, scalar=l)
 
     for dtype in [np.float16, np.float32, np.float64]:
         for ndim in range(1, 6):
             shape = rand_shape_nd(ndim, 5)
             data_np = np.random.randn(*shape).astype(dtype)
             expected = f(data_np, a, b, c)
-            backward_expected = (2 * a * data_np + b) * -l
+            backward_expected = (2 * a * data_np + b) * l
 
             # check imperative forward
             output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c)
-            output = mx.nd.contrib.gradientreversal(output, l=l)
-            assert_almost_equal(output.asnumpy(),expected,
+            output = mx.nd.contrib.gradientreversal(output, scalar=l)
+            assert_almost_equal(output.asnumpy(), expected,
                                 rtol=1e-2 if dtype is np.float16 else 1e-5,
                                 atol=1e-2 if dtype is np.float16 else 1e-5)
             # check forward

From d1fffac5e2fae2b41c2f4a2d3f54b7e4cc8e5c0b Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Mon, 14 Jan 2019 11:41:30 -0800
Subject: [PATCH 12/14] Renamed operator to gradient multiplier

---
 ...ient_reversal_op.cc => gradient_multiplier_op.cc} | 12 ++++++------
 ...ient_reversal_op.cu => gradient_multiplier_op.cu} |  6 +++---
 tests/python/unittest/test_contrib_operator.py       |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)
 rename src/operator/contrib/{gradient_reversal_op.cc => gradient_multiplier_op.cc} (92%)
 rename src/operator/contrib/{gradient_reversal_op.cu => gradient_multiplier_op.cu} (91%)

diff --git a/src/operator/contrib/gradient_reversal_op.cc b/src/operator/contrib/gradient_multiplier_op.cc
similarity index 92%
rename from src/operator/contrib/gradient_reversal_op.cc
rename to src/operator/contrib/gradient_multiplier_op.cc
index 63877e8807df..9d15ee14cd12 100644
--- a/src/operator/contrib/gradient_reversal_op.cc
+++ b/src/operator/contrib/gradient_multiplier_op.cc
@@ -19,7 +19,7 @@
 
 /*!
  * Copyright (c) 2018 by Contributors
- * \file gradient_reversal_op.cc
+ * \file gradient_multiplier_op.cc
  * \brief
  * \author Istvan Fehervari
 */
@@ -70,10 +70,10 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs,
   return dispatched;
 }
 
-MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientreversal)
-.describe(R"code(This operator implements the gradient reversal function.
+MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientmultiplier)
+.describe(R"code(This operator implements the gradient multiplier function.
 In forward pass it acts as an identity transform. During backpropagation it
-multiplies the gradient from the subsequent level by a negative factor and passes it to
+multiplies the gradient from the subsequent level by a scalar factor and passes it to
 the preceding layer.
 )code" ADD_FILELINE)
 .set_attr_parser([](NodeAttrs* attrs) {
@@ -82,14 +82,14 @@ the preceding layer.
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeEx<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientreversal"})
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_contrib_backward_gradientmultiplier"})
 .set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
   })
 .add_argument("scalar", "float", "scalar input");
 
-MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientreversal)
+MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientmultiplier)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FInferStorageType>("FInferStorageType", BinaryScalarStorageType)
 .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::mul>)
diff --git a/src/operator/contrib/gradient_reversal_op.cu b/src/operator/contrib/gradient_multiplier_op.cu
similarity index 91%
rename from src/operator/contrib/gradient_reversal_op.cu
rename to src/operator/contrib/gradient_multiplier_op.cu
index ed8b3cdda8dc..7159cea9805d 100644
--- a/src/operator/contrib/gradient_reversal_op.cu
+++ b/src/operator/contrib/gradient_multiplier_op.cu
@@ -19,7 +19,7 @@
 
 /*!
  * Copyright (c) 2018 by Contributors
- * \file gradient_reversal_op.cu
+ * \file gradient_multiplier_op.cu
  * \brief
  * \author Istvan Fehervari
 */
@@ -29,11 +29,11 @@
 namespace mxnet {
 namespace op {
 
-NNVM_REGISTER_OP(_contrib_gradientreversal)
+NNVM_REGISTER_OP(_contrib_gradientmultiplier)
 .set_attr<FComputeEx>("FComputeEx<gpu>", UnaryOp::IdentityComputeEx<gpu>)
 .set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
 
-NNVM_REGISTER_OP(_contrib_backward_gradientreversal)
+NNVM_REGISTER_OP(_contrib_backward_gradientmultiplier)
 .set_attr<FCompute>("FCompute<gpu>", BinaryScalarOp::Compute<gpu, op::mshadow_op::mul>)
 .set_attr<FComputeEx>("FComputeEx<gpu>", BinaryScalarOp::ComputeEx<gpu, op::mshadow_op::mul>);
 
diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index f2ad1dbbbc3b..16e6e179f7a2 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -261,8 +261,8 @@ def test_multibox_target_op():
     assert_array_equal(loc_mask.asnumpy(), expected_loc_mask)
     assert_array_equal(cls_target.asnumpy(), expected_cls_target)
 
-def test_gradient_reversal_op():
-    # We use the quadratic function in combination with gradient reversal
+def test_gradient_multiplier_op():
+    # We use the quadratic function in combination with gradient multiplier
     def f(x, a, b, c):
         return a * x**2 + b * x + c
 
@@ -273,7 +273,7 @@ def f(x, a, b, c):
     
     data = mx.symbol.Variable('data')
     quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
-    gr_q_sym = mx.sym.contrib.gradientreversal(quad_sym, scalar=l)
+    gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=l)
 
     for dtype in [np.float16, np.float32, np.float64]:
         for ndim in range(1, 6):
@@ -284,7 +284,7 @@ def f(x, a, b, c):
 
             # check imperative forward
             output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c)
-            output = mx.nd.contrib.gradientreversal(output, scalar=l)
+            output = mx.nd.contrib.gradientmultiplier(output, scalar=l)
             assert_almost_equal(output.asnumpy(), expected,
                                 rtol=1e-2 if dtype is np.float16 else 1e-5,
                                 atol=1e-2 if dtype is np.float16 else 1e-5)

From 54ae4f0007af370a4fce8167574a6360d1322df4 Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Mon, 14 Jan 2019 17:55:39 -0800
Subject: [PATCH 13/14] Update test_contrib_operator.py

Retrigger flaky test
---
 tests/python/unittest/test_contrib_operator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/python/unittest/test_contrib_operator.py b/tests/python/unittest/test_contrib_operator.py
index 16e6e179f7a2..aac807660af1 100644
--- a/tests/python/unittest/test_contrib_operator.py
+++ b/tests/python/unittest/test_contrib_operator.py
@@ -269,22 +269,22 @@ def f(x, a, b, c):
     a = np.random.random_sample()
     b = np.random.random_sample()
     c = np.random.random_sample()
-    l = np.random.random_sample() - 0.5
+    m = np.random.random_sample() - 0.5
     
     data = mx.symbol.Variable('data')
     quad_sym = mx.sym.contrib.quadratic(data=data, a=a, b=b, c=c)
-    gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=l)
+    gr_q_sym = mx.sym.contrib.gradientmultiplier(quad_sym, scalar=m)
 
     for dtype in [np.float16, np.float32, np.float64]:
         for ndim in range(1, 6):
             shape = rand_shape_nd(ndim, 5)
             data_np = np.random.randn(*shape).astype(dtype)
             expected = f(data_np, a, b, c)
-            backward_expected = (2 * a * data_np + b) * l
+            backward_expected = (2 * a * data_np + b) * m
 
             # check imperative forward
             output = mx.nd.contrib.quadratic(mx.nd.array(data_np), a=a, b=b, c=c)
-            output = mx.nd.contrib.gradientmultiplier(output, scalar=l)
+            output = mx.nd.contrib.gradientmultiplier(output, scalar=m)
             assert_almost_equal(output.asnumpy(), expected,
                                 rtol=1e-2 if dtype is np.float16 else 1e-5,
                                 atol=1e-2 if dtype is np.float16 else 1e-5)

From 3983458f8332fa4f05500efe4c698491b097242d Mon Sep 17 00:00:00 2001
From: Istvan Fehervari <gooksl@gmail.com>
Date: Thu, 17 Jan 2019 07:25:48 -0800
Subject: [PATCH 14/14] Update gradient_multiplier_op.cc

Improved the description of the scalar multiplier
---
 src/operator/contrib/gradient_multiplier_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/operator/contrib/gradient_multiplier_op.cc b/src/operator/contrib/gradient_multiplier_op.cc
index 9d15ee14cd12..47f891ef802b 100644
--- a/src/operator/contrib/gradient_multiplier_op.cc
+++ b/src/operator/contrib/gradient_multiplier_op.cc
@@ -73,7 +73,7 @@ static bool BinaryScalarStorageType(const nnvm::NodeAttrs& attrs,
 MXNET_OPERATOR_REGISTER_UNARY(_contrib_gradientmultiplier)
 .describe(R"code(This operator implements the gradient multiplier function.
 In forward pass it acts as an identity transform. During backpropagation it
-multiplies the gradient from the subsequent level by a scalar factor and passes it to
+multiplies the gradient from the subsequent level by a scalar factor lambda and passes it to
 the preceding layer.
 )code" ADD_FILELINE)
 .set_attr_parser([](NodeAttrs* attrs) {
@@ -87,7 +87,7 @@ the preceding layer.
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
   })
-.add_argument("scalar", "float", "scalar input");
+.add_argument("scalar", "float", "lambda multiplier");
 
 MXNET_OPERATOR_REGISTER_BINARY_SCALAR(_contrib_backward_gradientmultiplier)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)