From 3737bbfc98f754c15a758e9706daeffce553768c Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sat, 17 Oct 2015 22:56:40 -0700
Subject: [PATCH] [OP] Add Regression operator for binary and linear regression

---
 R-package/NAMESPACE                           |   2 +
 R-package/R/mxnet_generated.R                 |  31 ++++
 R-package/man/mx.io.arrayiter.Rd              |   2 +-
 R-package/man/mx.nd.array.Rd                  |   2 +
 .../man/mx.symbol.LinearRegressionOutput.Rd   |  25 ++++
 .../man/mx.symbol.LogisticRegressionOutput.Rd |  27 ++++
 python/setup.py                               |   4 +-
 src/operator/regression_output-inl.h          | 135 ++++++++++++++++++
 src/operator/regression_output.cc             |  43 ++++++
 src/operator/regression_output.cu             |  26 ++++
 tests/python/unittest/test_operator.py        |  29 ++++
 11 files changed, 323 insertions(+), 3 deletions(-)
 create mode 100644 R-package/man/mx.symbol.LinearRegressionOutput.Rd
 create mode 100644 R-package/man/mx.symbol.LogisticRegressionOutput.Rd
 create mode 100644 src/operator/regression_output-inl.h
 create mode 100644 src/operator/regression_output.cc
 create mode 100644 src/operator/regression_output.cu

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index b3ac37a75dfa..2934467eaabf 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -60,6 +60,8 @@ export(mx.symbol.FullyConnected)
 export(mx.symbol.Group)
 export(mx.symbol.LRN)
 export(mx.symbol.LeakyReLU)
+export(mx.symbol.LinearRegressionOutput)
+export(mx.symbol.LogisticRegressionOutput)
 export(mx.symbol.Pooling)
 export(mx.symbol.Reshape)
 export(mx.symbol.SliceChannel)
diff --git a/R-package/R/mxnet_generated.R b/R-package/R/mxnet_generated.R
index 3d3b364bb2a1..14334d5da376 100644
--- a/R-package/R/mxnet_generated.R
+++ b/R-package/R/mxnet_generated.R
@@ -324,6 +324,37 @@ mx.symbol.LeakyReLU <- function(...) {
   mx.varg.symbol.LeakyReLU(list(...))
 }
 
+#' Use linear regression for final output, this is used on final output of a net.
+#' 
+#' @param data  Symbol
+#'     Input data to function.
+#' @param label  Symbol
+#'     Input label to function.
+#' @param name  string, optional
+#'     Name of the resulting symbol.
+#' @return out The result mx.symbol
+#' 
+#' @export
+mx.symbol.LinearRegressionOutput <- function(...) {
+  mx.varg.symbol.LinearRegressionOutput(list(...))
+}
+
+#' Use Logistic regression for final output, this is used on final output of a net.
+#' Logistic regression is suitable for binary classification or probability prediction tasks.
+#' 
+#' @param data  Symbol
+#'     Input data to function.
+#' @param label  Symbol
+#'     Input label to function.
+#' @param name  string, optional
+#'     Name of the resulting symbol.
+#' @return out The result mx.symbol
+#' 
+#' @export
+mx.symbol.LogisticRegressionOutput <- function(...) {
+  mx.varg.symbol.LogisticRegressionOutput(list(...))
+}
+
 #' Perform spatial pooling on inputs.
 #' 
 #' @param data  Symbol
diff --git a/R-package/man/mx.io.arrayiter.Rd b/R-package/man/mx.io.arrayiter.Rd
index c9f3da901eff..cb0db7d4a7fa 100644
--- a/R-package/man/mx.io.arrayiter.Rd
+++ b/R-package/man/mx.io.arrayiter.Rd
@@ -4,7 +4,7 @@
 \alias{mx.io.arrayiter}
 \title{Create MXDataIter compatible iterator from R's array}
 \usage{
-mx.io.arrayiter(data, label = NULL, batch.size = 128, shuffle = FALSE)
+mx.io.arrayiter(data, label, batch.size = 128, shuffle = FALSE)
 }
 \arguments{
 \item{data}{The data array.}
diff --git a/R-package/man/mx.nd.array.Rd b/R-package/man/mx.nd.array.Rd
index a1004c7a92a7..061ba0912094 100644
--- a/R-package/man/mx.nd.array.Rd
+++ b/R-package/man/mx.nd.array.Rd
@@ -13,6 +13,8 @@ mx.nd.array(src.array, ctx = NULL)
 }
 \value{
 An \code{mx.ndarray}
+
+An Rcpp_MXNDArray object
 }
 \description{
 Create a new \code{mx.ndarray} that copies the content from src on ctx.
diff --git a/R-package/man/mx.symbol.LinearRegressionOutput.Rd b/R-package/man/mx.symbol.LinearRegressionOutput.Rd
new file mode 100644
index 000000000000..8d00dd325d1b
--- /dev/null
+++ b/R-package/man/mx.symbol.LinearRegressionOutput.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/mxnet_generated.R
+\name{mx.symbol.LinearRegressionOutput}
+\alias{mx.symbol.LinearRegressionOutput}
+\title{Use linear regression for final output, this is used on final output of a net.}
+\usage{
+mx.symbol.LinearRegressionOutput(...)
+}
+\arguments{
+\item{data}{Symbol
+Input data to function.}
+
+\item{label}{Symbol
+Input label to function.}
+
+\item{name}{string, optional
+Name of the resulting symbol.}
+}
+\value{
+out The result mx.symbol
+}
+\description{
+Use linear regression for final output, this is used on final output of a net.
+}
+
diff --git a/R-package/man/mx.symbol.LogisticRegressionOutput.Rd b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd
new file mode 100644
index 000000000000..221816ea6c15
--- /dev/null
+++ b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/mxnet_generated.R
+\name{mx.symbol.LogisticRegressionOutput}
+\alias{mx.symbol.LogisticRegressionOutput}
+\title{Use Logistic regression for final output, this is used on final output of a net.
+Logistic regression is suitable for binary classification or probability prediction tasks.}
+\usage{
+mx.symbol.LogisticRegressionOutput(...)
+}
+\arguments{
+\item{data}{Symbol
+Input data to function.}
+
+\item{label}{Symbol
+Input label to function.}
+
+\item{name}{string, optional
+Name of the resulting symbol.}
+}
+\value{
+out The result mx.symbol
+}
+\description{
+Use Logistic regression for final output, this is used on final output of a net.
+Logistic regression is suitable for binary classification or probability prediction tasks.
+}
+
diff --git a/python/setup.py b/python/setup.py
index c3bfbbcd220a..a41b2f0bd3d4 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -18,8 +18,8 @@
       version=__version__,
       description=open(os.path.join(CURRENT_DIR, 'README.md')).read(),
       install_requires=[
-        'numpy',
-        ],
+          'numpy',
+      ],
       zip_safe=False,
       packages=['mxnet'],
       data_files=[('mxnet', [LIB_PATH[0]])],
diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
new file mode 100644
index 000000000000..4c4bf6ffb625
--- /dev/null
+++ b/src/operator/regression_output-inl.h
@@ -0,0 +1,135 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file regression_ouput-inl.h
+ * \brief Regression output operator.
+ */
+#ifndef MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_
+#define MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_
+
+#include <dmlc/logging.h>
+#include <mxnet/operator.h>
+#include <map>
+#include <string>
+#include <vector>
+#include <utility>
+#include "./operator_common.h"
+
+namespace mxnet {
+namespace op {
+enum RegressionOutputOpInputs {kData, kLabel};
+enum RegressionOutputOutputs {kOut};
+enum RegressionOutputType {kLinear, kLogistic};
+
+// Special Operator to output regression value in forward
+// And get gradient in calculation.
+template<typename xpu, typename ForwardOp, typename BackwardOp>
+class RegressionOutputOp : public Operator {
+ public:
+  virtual void Forward(const OpContext &ctx,
+                       const std::vector<TBlob> &in_data,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &out_data,
+                       const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]";
+    CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]";
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 2> data = in_data[kData].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 2> out = out_data[kOut].FlatTo2D<xpu, real_t>(s);
+    Assign(out, req[kOut], F<ForwardOp>(data));
+  }
+
+  virtual void Backward(const OpContext &ctx,
+                        const std::vector<TBlob> &out_grad,
+                        const std::vector<TBlob> &in_data,
+                        const std::vector<TBlob> &out_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &in_grad,
+                        const std::vector<TBlob> &aux_args) {
+    using namespace mshadow;
+    using namespace mshadow::expr;
+    CHECK_EQ(in_data.size(), 2);
+    CHECK_EQ(out_grad.size(), 1);
+    CHECK_GE(in_grad.size(), 1);
+    CHECK_GE(req.size(), 1);
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    Tensor<xpu, 1> label = in_data[kLabel].get<xpu, 1, real_t>(s);
+    Tensor<xpu, 2> out = out_data[kOut].FlatTo2D<xpu, real_t>(s);
+    Tensor<xpu, 2> grad = in_grad[kData].FlatTo2D<xpu, real_t>(s);
+    Assign(grad, req[kData], F<BackwardOp>(out, reshape(label, grad.shape_)));
+  }
+};
+
+// Decalre Factory function, used for dispatch specialization
+template<typename xpu>
+Operator* CreateRegressionOutputOp(RegressionOutputType type);
+
+#if DMLC_USE_CXX11
+template<RegressionOutputType type>
+class RegressionOutputProp : public OperatorProperty {
+ public:
+  std::vector<std::string> ListArguments() const override {
+    return {"data", "label"};
+  }
+
+  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
+  }
+
+  std::map<std::string, std::string> GetParams() const override {
+    return std::map<std::string, std::string>();
+  }
+
+  bool InferShape(std::vector<TShape> *in_shape,
+                  std::vector<TShape> *out_shape,
+                  std::vector<TShape> *aux_shape) const override {
+    using namespace mshadow;
+    CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]";
+    const TShape &dshape = in_shape->at(0);
+    if (dshape.ndim() == 0) return false;
+    CHECK_EQ(dshape[1], 1) << TypeString() << " requires input's num_hidden=1.";
+    SHAPE_ASSIGN_CHECK(*in_shape, 1, Shape1(dshape[0]));
+    out_shape->clear();
+    out_shape->push_back(dshape);
+    return true;
+  }
+
+  OperatorProperty* Copy() const override {
+    return new RegressionOutputProp<type>();
+  }
+
+  std::string TypeString() const override {
+    switch (type) {
+      case kLinear: return "LinearRegressionOutput";
+      case kLogistic: return "LogisticRegressionOutput";
+      default: LOG(FATAL) << "unknown type"; return "";
+    }
+  }
+
+  std::vector<int> DeclareBackwardDependency(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data) const override {
+    return {in_data[kLabel], out_data[kOut]};
+  }
+
+  std::vector<std::pair<int, void*> > BackwardInplaceOption(
+    const std::vector<int> &out_grad,
+    const std::vector<int> &in_data,
+    const std::vector<int> &out_data,
+    const std::vector<void*> &in_grad) const override {
+    return {{out_data[kOut], in_grad[kData]}};
+  }
+
+  std::vector<std::pair<int, void*> > ForwardInplaceOption(
+    const std::vector<int> &in_data,
+    const std::vector<void*> &out_data) const override {
+    return {{in_data[kData], out_data[kOut]}};
+  }
+
+  Operator* CreateOperator(Context ctx) const;
+};
+#endif  // DMLC_USE_CXX11
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_
diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc
new file mode 100644
index 000000000000..e10888d624e3
--- /dev/null
+++ b/src/operator/regression_output.cc
@@ -0,0 +1,43 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file regression_output.cc
+ * \brief regression output operator
+*/
+#include "./regression_output-inl.h"
+#include "./mshadow_op.h"
+
+namespace mxnet {
+namespace op {
+
+template<>
+Operator *CreateRegressionOutputOp<cpu>(RegressionOutputType type) {
+  switch (type) {
+    case kLinear:
+      return new RegressionOutputOp<cpu, mshadow::op::identity, mshadow::op::minus>();
+    case kLogistic:
+      return new RegressionOutputOp<cpu, mshadow_op::sigmoid, mshadow::op::minus>();
+    default:
+      LOG(FATAL) << "unknown activation type " << type;
+  }
+  return nullptr;
+}
+
+// DO_BIND_DISPATCH comes from operator_common.h
+template<RegressionOutputType type>
+Operator *RegressionOutputProp<type>::CreateOperator(Context ctx) const {
+  DO_BIND_DISPATCH(CreateRegressionOutputOp, type);
+}
+
+MXNET_REGISTER_OP_PROPERTY(LinearRegressionOutput, RegressionOutputProp<kLinear>)
+.describe("Use linear regression for final output, this is used on final output of a net.")
+.add_argument("data", "Symbol", "Input data to function.")
+.add_argument("label", "Symbol", "Input label to function.");
+
+MXNET_REGISTER_OP_PROPERTY(LogisticRegressionOutput, RegressionOutputProp<kLogistic>)
+.describe("Use Logistic regression for final output, this is used on final output of a net.\n"
+          "Logistic regression is suitable for binary classification "
+          "or probability prediction tasks.")
+.add_argument("data", "Symbol", "Input data to function.")
+.add_argument("label", "Symbol", "Input label to function.");
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu
new file mode 100644
index 000000000000..c653b556278d
--- /dev/null
+++ b/src/operator/regression_output.cu
@@ -0,0 +1,26 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file regression_output.cu
+ * \brief regression output operator
+*/
+#include "./regression_output-inl.h"
+#include "./mshadow_op.h"
+
+namespace mxnet {
+namespace op {
+
+template<>
+Operator *CreateRegressionOutputOp<gpu>(RegressionOutputType type) {
+  switch (type) {
+    case kLinear:
+      return new RegressionOutputOp<gpu, mshadow::op::identity, mshadow::op::minus>();
+    case kLogistic:
+      return new RegressionOutputOp<gpu, mshadow_op::sigmoid, mshadow::op::minus>();
+    default:
+      LOG(FATAL) << "unknown activation type " << type;
+  }
+  return NULL;
+}
+}  // namespace op
+}  // namespace mxnet
+
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 304ef2b3e720..fbc007b9fed7 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -143,7 +143,36 @@ def test_slice_channel():
     check_slice_channel(2)
     check_slice_channel(4)
 
+def check_regression(symbol, forward, backward):
+    data = mx.symbol.Variable('data')
+    label = mx.symbol.Variable('label')
+    out = symbol(data, label)
+    shape = (3, 1)
+    arr_data = mx.random.uniform(-1, 1, shape)
+    arr_label = mx.random.uniform(0, 1, shape[0])
+    arr_grad = mx.nd.empty(shape)
+    exec1 = out.bind(mx.cpu(),
+                     args=[arr_data, arr_label],
+                     args_grad={"data" : arr_grad})
+    exec1.forward()
+    out1 = exec1.outputs[0].asnumpy()
+    npout = forward(arr_data.asnumpy())
+    assert reldiff(npout, out1) < 1e-6
+
+    exec1.backward()
+    npout = backward(npout,  arr_label.asnumpy().reshape(npout.shape))
+    assert reldiff(npout, arr_grad.asnumpy()) < 1e-6
+
+def test_regression():
+    check_regression(mx.symbol.LogisticRegressionOutput,
+                     lambda x: 1.0 / (1.0 + np.exp(-x)),
+                     lambda x, y : x - y)
+    check_regression(mx.symbol.LinearRegressionOutput,
+                     lambda x: x,
+                     lambda x, y : x - y)
+
 if __name__ == '__main__':
     test_elementwise_sum()
     test_concat()
     test_slice_channel()
+    test_regression()