From 3737bbfc98f754c15a758e9706daeffce553768c Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 17 Oct 2015 22:56:40 -0700 Subject: [PATCH] [OP] Add Regression operator for binary and linear regression --- R-package/NAMESPACE | 2 + R-package/R/mxnet_generated.R | 31 ++++ R-package/man/mx.io.arrayiter.Rd | 2 +- R-package/man/mx.nd.array.Rd | 2 + .../man/mx.symbol.LinearRegressionOutput.Rd | 25 ++++ .../man/mx.symbol.LogisticRegressionOutput.Rd | 27 ++++ python/setup.py | 4 +- src/operator/regression_output-inl.h | 135 ++++++++++++++++++ src/operator/regression_output.cc | 43 ++++++ src/operator/regression_output.cu | 26 ++++ tests/python/unittest/test_operator.py | 29 ++++ 11 files changed, 323 insertions(+), 3 deletions(-) create mode 100644 R-package/man/mx.symbol.LinearRegressionOutput.Rd create mode 100644 R-package/man/mx.symbol.LogisticRegressionOutput.Rd create mode 100644 src/operator/regression_output-inl.h create mode 100644 src/operator/regression_output.cc create mode 100644 src/operator/regression_output.cu diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index b3ac37a75dfa..2934467eaabf 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -60,6 +60,8 @@ export(mx.symbol.FullyConnected) export(mx.symbol.Group) export(mx.symbol.LRN) export(mx.symbol.LeakyReLU) +export(mx.symbol.LinearRegressionOutput) +export(mx.symbol.LogisticRegressionOutput) export(mx.symbol.Pooling) export(mx.symbol.Reshape) export(mx.symbol.SliceChannel) diff --git a/R-package/R/mxnet_generated.R b/R-package/R/mxnet_generated.R index 3d3b364bb2a1..14334d5da376 100644 --- a/R-package/R/mxnet_generated.R +++ b/R-package/R/mxnet_generated.R @@ -324,6 +324,37 @@ mx.symbol.LeakyReLU <- function(...) { mx.varg.symbol.LeakyReLU(list(...)) } +#' Use linear regression for final output, this is used on final output of a net. +#' +#' @param data Symbol +#' Input data to function. +#' @param label Symbol +#' Input label to function. +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.LinearRegressionOutput <- function(...) { + mx.varg.symbol.LinearRegressionOutput(list(...)) +} + +#' Use Logistic regression for final output, this is used on final output of a net. +#' Logistic regression is suitable for binary classification or probability prediction tasks. +#' +#' @param data Symbol +#' Input data to function. +#' @param label Symbol +#' Input label to function. +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.LogisticRegressionOutput <- function(...) { + mx.varg.symbol.LogisticRegressionOutput(list(...)) +} + #' Perform spatial pooling on inputs. #' #' @param data Symbol diff --git a/R-package/man/mx.io.arrayiter.Rd b/R-package/man/mx.io.arrayiter.Rd index c9f3da901eff..cb0db7d4a7fa 100644 --- a/R-package/man/mx.io.arrayiter.Rd +++ b/R-package/man/mx.io.arrayiter.Rd @@ -4,7 +4,7 @@ \alias{mx.io.arrayiter} \title{Create MXDataIter compatible iterator from R's array} \usage{ -mx.io.arrayiter(data, label = NULL, batch.size = 128, shuffle = FALSE) +mx.io.arrayiter(data, label, batch.size = 128, shuffle = FALSE) } \arguments{ \item{data}{The data array.} diff --git a/R-package/man/mx.nd.array.Rd b/R-package/man/mx.nd.array.Rd index a1004c7a92a7..061ba0912094 100644 --- a/R-package/man/mx.nd.array.Rd +++ b/R-package/man/mx.nd.array.Rd @@ -13,6 +13,8 @@ mx.nd.array(src.array, ctx = NULL) } \value{ An \code{mx.ndarray} + +An Rcpp_MXNDArray object } \description{ Create a new \code{mx.ndarray} that copies the content from src on ctx. diff --git a/R-package/man/mx.symbol.LinearRegressionOutput.Rd b/R-package/man/mx.symbol.LinearRegressionOutput.Rd new file mode 100644 index 000000000000..8d00dd325d1b --- /dev/null +++ b/R-package/man/mx.symbol.LinearRegressionOutput.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.LinearRegressionOutput} +\alias{mx.symbol.LinearRegressionOutput} +\title{Use linear regression for final output, this is used on final output of a net.} +\usage{ +mx.symbol.LinearRegressionOutput(...) +} +\arguments{ +\item{data}{Symbol +Input data to function.} + +\item{label}{Symbol +Input label to function.} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Use linear regression for final output, this is used on final output of a net. +} + diff --git a/R-package/man/mx.symbol.LogisticRegressionOutput.Rd b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd new file mode 100644 index 000000000000..221816ea6c15 --- /dev/null +++ b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.LogisticRegressionOutput} +\alias{mx.symbol.LogisticRegressionOutput} +\title{Use Logistic regression for final output, this is used on final output of a net. +Logistic regression is suitable for binary classification or probability prediction tasks.} +\usage{ +mx.symbol.LogisticRegressionOutput(...) +} +\arguments{ +\item{data}{Symbol +Input data to function.} + +\item{label}{Symbol +Input label to function.} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Use Logistic regression for final output, this is used on final output of a net. +Logistic regression is suitable for binary classification or probability prediction tasks. +} + diff --git a/python/setup.py b/python/setup.py index c3bfbbcd220a..a41b2f0bd3d4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -18,8 +18,8 @@ version=__version__, description=open(os.path.join(CURRENT_DIR, 'README.md')).read(), install_requires=[ - 'numpy', - ], + 'numpy', + ], zip_safe=False, packages=['mxnet'], data_files=[('mxnet', [LIB_PATH[0]])], diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h new file mode 100644 index 000000000000..4c4bf6ffb625 --- /dev/null +++ b/src/operator/regression_output-inl.h @@ -0,0 +1,135 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file regression_ouput-inl.h + * \brief Regression output operator. + */ +#ifndef MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_ +#define MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_ + +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { +enum RegressionOutputOpInputs {kData, kLabel}; +enum RegressionOutputOutputs {kOut}; +enum RegressionOutputType {kLinear, kLogistic}; + +// Special Operator to output regression value in forward +// And get gradient in calculation. +template +class RegressionOutputOp : public Operator { + public: + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]"; + CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]"; + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].FlatTo2D(s); + Tensor out = out_data[kOut].FlatTo2D(s); + Assign(out, req[kOut], F(data)); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), 2); + CHECK_EQ(out_grad.size(), 1); + CHECK_GE(in_grad.size(), 1); + CHECK_GE(req.size(), 1); + Stream *s = ctx.get_stream(); + Tensor label = in_data[kLabel].get(s); + Tensor out = out_data[kOut].FlatTo2D(s); + Tensor grad = in_grad[kData].FlatTo2D(s); + Assign(grad, req[kData], F(out, reshape(label, grad.shape_))); + } +}; + +// Decalre Factory function, used for dispatch specialization +template +Operator* CreateRegressionOutputOp(RegressionOutputType type); + +#if DMLC_USE_CXX11 +template +class RegressionOutputProp : public OperatorProperty { + public: + std::vector ListArguments() const override { + return {"data", "label"}; + } + + void Init(const std::vector >& kwargs) override { + } + + std::map GetParams() const override { + return std::map(); + } + + bool InferShape(std::vector *in_shape, + std::vector *out_shape, + std::vector *aux_shape) const override { + using namespace mshadow; + CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]"; + const TShape &dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + CHECK_EQ(dshape[1], 1) << TypeString() << " requires input's num_hidden=1."; + SHAPE_ASSIGN_CHECK(*in_shape, 1, Shape1(dshape[0])); + out_shape->clear(); + out_shape->push_back(dshape); + return true; + } + + OperatorProperty* Copy() const override { + return new RegressionOutputProp(); + } + + std::string TypeString() const override { + switch (type) { + case kLinear: return "LinearRegressionOutput"; + case kLogistic: return "LogisticRegressionOutput"; + default: LOG(FATAL) << "unknown type"; return ""; + } + } + + std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { + return {in_data[kLabel], out_data[kOut]}; + } + + std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { + return {{out_data[kOut], in_grad[kData]}}; + } + + std::vector > ForwardInplaceOption( + const std::vector &in_data, + const std::vector &out_data) const override { + return {{in_data[kData], out_data[kOut]}}; + } + + Operator* CreateOperator(Context ctx) const; +}; +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_REGRESSION_OUTPUT_INL_H_ diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc new file mode 100644 index 000000000000..e10888d624e3 --- /dev/null +++ b/src/operator/regression_output.cc @@ -0,0 +1,43 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file regression_output.cc + * \brief regression output operator +*/ +#include "./regression_output-inl.h" +#include "./mshadow_op.h" + +namespace mxnet { +namespace op { + +template<> +Operator *CreateRegressionOutputOp(RegressionOutputType type) { + switch (type) { + case kLinear: + return new RegressionOutputOp(); + case kLogistic: + return new RegressionOutputOp(); + default: + LOG(FATAL) << "unknown activation type " << type; + } + return nullptr; +} + +// DO_BIND_DISPATCH comes from operator_common.h +template +Operator *RegressionOutputProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateRegressionOutputOp, type); +} + +MXNET_REGISTER_OP_PROPERTY(LinearRegressionOutput, RegressionOutputProp) +.describe("Use linear regression for final output, this is used on final output of a net.") +.add_argument("data", "Symbol", "Input data to function.") +.add_argument("label", "Symbol", "Input label to function."); + +MXNET_REGISTER_OP_PROPERTY(LogisticRegressionOutput, RegressionOutputProp) +.describe("Use Logistic regression for final output, this is used on final output of a net.\n" + "Logistic regression is suitable for binary classification " + "or probability prediction tasks.") +.add_argument("data", "Symbol", "Input data to function.") +.add_argument("label", "Symbol", "Input label to function."); +} // namespace op +} // namespace mxnet diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu new file mode 100644 index 000000000000..c653b556278d --- /dev/null +++ b/src/operator/regression_output.cu @@ -0,0 +1,26 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file regression_output.cu + * \brief regression output operator +*/ +#include "./regression_output-inl.h" +#include "./mshadow_op.h" + +namespace mxnet { +namespace op { + +template<> +Operator *CreateRegressionOutputOp(RegressionOutputType type) { + switch (type) { + case kLinear: + return new RegressionOutputOp(); + case kLogistic: + return new RegressionOutputOp(); + default: + LOG(FATAL) << "unknown activation type " << type; + } + return NULL; +} +} // namespace op +} // namespace mxnet + diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 304ef2b3e720..fbc007b9fed7 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -143,7 +143,36 @@ def test_slice_channel(): check_slice_channel(2) check_slice_channel(4) +def check_regression(symbol, forward, backward): + data = mx.symbol.Variable('data') + label = mx.symbol.Variable('label') + out = symbol(data, label) + shape = (3, 1) + arr_data = mx.random.uniform(-1, 1, shape) + arr_label = mx.random.uniform(0, 1, shape[0]) + arr_grad = mx.nd.empty(shape) + exec1 = out.bind(mx.cpu(), + args=[arr_data, arr_label], + args_grad={"data" : arr_grad}) + exec1.forward() + out1 = exec1.outputs[0].asnumpy() + npout = forward(arr_data.asnumpy()) + assert reldiff(npout, out1) < 1e-6 + + exec1.backward() + npout = backward(npout, arr_label.asnumpy().reshape(npout.shape)) + assert reldiff(npout, arr_grad.asnumpy()) < 1e-6 + +def test_regression(): + check_regression(mx.symbol.LogisticRegressionOutput, + lambda x: 1.0 / (1.0 + np.exp(-x)), + lambda x, y : x - y) + check_regression(mx.symbol.LinearRegressionOutput, + lambda x: x, + lambda x, y : x - y) + if __name__ == '__main__': test_elementwise_sum() test_concat() test_slice_channel() + test_regression()