From 3317180ad9b8db970728bc4a18c2a7ae7e652000 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Fri, 19 Jun 2015 14:39:52 -0600 Subject: [PATCH 01/12] activation op --- src/operator/activation_op-inl.hpp | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/operator/activation_op-inl.hpp diff --git a/src/operator/activation_op-inl.hpp b/src/operator/activation_op-inl.hpp new file mode 100644 index 000000000000..a3d5fe8df1ec --- /dev/null +++ b/src/operator/activation_op-inl.hpp @@ -0,0 +1,50 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file activation_op-inl.hpp + * \brief activation operator of mxnet + */ + +#ifndef ACTIVATION_OP_INL_HPP +#define ACTIVATION_OP_INL_HPP +#pragma once +#include + +namespace mxnet { +template +class ActivationOp : public Operator { +public: + virtual void InferShape(const std::vector &in_shape, + std::vector *out_shape) { + CHECK(in_shape.size() == 1) << "Activation Op: only 1 input is allowed"; + out_shape->resize(in_shape.size()); + out_shape->at(0) = in_shape[0]; + } + virtual void Forward(Option opt, + RunContext ctx, + const std::vector &in_data, + const std::vector &out_data) { + CHECK(out_data.size() == 1) << "Activation Op: only 1 output data is allowed"; + CHECK(in_data.size() == 1) << "Activation Op: only 1 input data is allowed"; + mshadow::Stream *stream = static_cast *>(ctx.stream); + mshadow::Tensor in = in_data[0].FlatTo2D(stream); + mshadow::Tensor out = out_data[0].FlatTo2D(stream); + out = mshadow::expr::F(in); + } + virtual void Backward(RunContext ctx, + const std::vector &grad_next, + const std::vector &in_data, + const std::vector &out_grad, + const std::vector req) { + CHECK(grad_next.size() == 1) << "Activation Op: only 1 input grad is allowed"; + CHECK(in_data.size() == 1) << "Activation Op: only 1 input data is allowed"; + CHECK(req.size() == 1) << "Activation Op: only 1 req is allowed"; + CHECK(req[0] == kWriteInplace) << "Activation Op: only support inplace mode"; + mshadow::Stream *stream = static_cast *>(ctx.stream); + mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); + mshadow::Tensor data = in_data[0].FlatTo2D(stream); + data = mshadow::expr::F(data) * grad; + } +}; // class ActivationOp +} // namespace cxxnet + +#endif // ACTIVATION_OP_INL_HPP From e4e2178fc6e52087ad351a4488e18df77800c026 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Fri, 19 Jun 2015 14:58:56 -0600 Subject: [PATCH 02/12] chg --- src/operator/activation_op-inl.hpp | 39 +++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/operator/activation_op-inl.hpp b/src/operator/activation_op-inl.hpp index a3d5fe8df1ec..02ad1cd24951 100644 --- a/src/operator/activation_op-inl.hpp +++ b/src/operator/activation_op-inl.hpp @@ -8,24 +8,28 @@ #define ACTIVATION_OP_INL_HPP #pragma once #include +#include namespace mxnet { template class ActivationOp : public Operator { -public: + public: virtual void InferShape(const std::vector &in_shape, std::vector *out_shape) { CHECK(in_shape.size() == 1) << "Activation Op: only 1 input is allowed"; - out_shape->resize(in_shape.size()); - out_shape->at(0) = in_shape[0]; + TShape out = in_shape[0]; + out_shape->push_back(out); } virtual void Forward(Option opt, RunContext ctx, const std::vector &in_data, const std::vector &out_data) { - CHECK(out_data.size() == 1) << "Activation Op: only 1 output data is allowed"; - CHECK(in_data.size() == 1) << "Activation Op: only 1 input data is allowed"; - mshadow::Stream *stream = static_cast *>(ctx.stream); + CHECK(out_data.size() == 1) << \ + "Activation Op: only 1 output data is allowed"; + CHECK(in_data.size() == 1) << \ + "Activation Op: only 1 input data is allowed"; + mshadow::Stream *stream = \ + static_cast *>(ctx.stream); mshadow::Tensor in = in_data[0].FlatTo2D(stream); mshadow::Tensor out = out_data[0].FlatTo2D(stream); out = mshadow::expr::F(in); @@ -35,16 +39,23 @@ class ActivationOp : public Operator { const std::vector &in_data, const std::vector &out_grad, const std::vector req) { - CHECK(grad_next.size() == 1) << "Activation Op: only 1 input grad is allowed"; - CHECK(in_data.size() == 1) << "Activation Op: only 1 input data is allowed"; - CHECK(req.size() == 1) << "Activation Op: only 1 req is allowed"; - CHECK(req[0] == kWriteInplace) << "Activation Op: only support inplace mode"; - mshadow::Stream *stream = static_cast *>(ctx.stream); + CHECK(grad_next.size() == 1) << \ + "Activation Op: only 1 input grad is allowed"; + CHECK(in_data.size() == 1) << \ + "Activation Op: only 1 input data is allowed"; + CHECK(req.size() == 1) << \ + "Activation Op: only 1 req is allowed"; + CHECK(req[0] == kWriteInplace) << \ + "Activation Op: only support inplace mode"; + mshadow::Stream *stream = \ + static_cast *>(ctx.stream); mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); mshadow::Tensor data = in_data[0].FlatTo2D(stream); data = mshadow::expr::F(data) * grad; } -}; // class ActivationOp -} // namespace cxxnet +}; // class ActivationOp +} // namespace mxnet + +#endif // ACTIVATION_OP_INL_HPP + -#endif // ACTIVATION_OP_INL_HPP From 3674aefb2cad80826489f1efa0b83118a8ab1fb3 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Fri, 19 Jun 2015 16:06:19 -0600 Subject: [PATCH 03/12] add op and extra opt --- include/mxnet/operator.h | 7 +- src/operator/activation_op-inl.hpp | 9 +-- src/operator/op.h | 109 +++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 6 deletions(-) create mode 100644 src/operator/op.h diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index fbe2e2c8f6af..da371dc9b326 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -26,6 +26,8 @@ class Operator { struct Option { /*! \brief whether it is training phase*/ int is_train; + /*! \brief whether propagate gradient to x in backprop */ + int prop_grad; }; /*! \briref gradient request type the request can have */ enum GradReqType { @@ -43,7 +45,7 @@ class Operator { * \param name parameter name * \param val string for configuration */ - virtual void SetParam(const char *name, const char *val) {} + virtual void SetParam(const char *name, const char *val) {} /*! * \brief inter the shape of output given the input data * \param in_shape the shape of input arguments of the operator @@ -73,7 +75,8 @@ class Operator { * \param req_types request types of the gradient saving operation * \sa GradReqType */ - virtual void Backward(RunContext ctx, + virtual void Backward(Option opt, + RunContext ctx, const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, diff --git a/src/operator/activation_op-inl.hpp b/src/operator/activation_op-inl.hpp index 02ad1cd24951..7ac0ddc87a53 100644 --- a/src/operator/activation_op-inl.hpp +++ b/src/operator/activation_op-inl.hpp @@ -4,8 +4,8 @@ * \brief activation operator of mxnet */ -#ifndef ACTIVATION_OP_INL_HPP -#define ACTIVATION_OP_INL_HPP +#ifndef SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ +#define SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ #pragma once #include #include @@ -34,7 +34,8 @@ class ActivationOp : public Operator { mshadow::Tensor out = out_data[0].FlatTo2D(stream); out = mshadow::expr::F(in); } - virtual void Backward(RunContext ctx, + virtual void Backward(Option opt, + RunContext ctx, const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, @@ -56,6 +57,6 @@ class ActivationOp : public Operator { }; // class ActivationOp } // namespace mxnet -#endif // ACTIVATION_OP_INL_HPP +#endif // SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ diff --git a/src/operator/op.h b/src/operator/op.h new file mode 100644 index 000000000000..32b848846f70 --- /dev/null +++ b/src/operator/op.h @@ -0,0 +1,109 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file op.h + * \brief extra mshadow operation for mxnet + * \author Bing Xu + */ +#ifndef SRC_OPERATOR_OP_H_ +#define SRC_OPERATOR_OP_H_ +#pragma once + +#include + +namespace mxnet { +/*! \brief operations for ActivationLayer */ +namespace op { +struct identity { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a; + } +}; +struct identity_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return 1.0f; + } +}; + +/*! \brief sigmoid unit */ +struct sigmoid { + MSHADOW_XINLINE static real_t Map(real_t a) { + return 1.0f / (1.0f + expf(-a)); + } +}; +struct sigmoid_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a * (1.0f - a); + } +}; +/*! \brief Rectified Linear Operation */ +struct relu { + MSHADOW_XINLINE static real_t Map(real_t a) { + return std::max(a, 0.0f); + } +}; +struct relu_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a > 0.0f ? 1.0f : 0.0f; + } +}; + +/*! \brief Leaky ReLU Operation */ +struct xelu { + MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { + return a > 0 ? a : a / b; + } +}; + +struct xelu_grad { + MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { + return a > 0 ? 1 : 1.0f / b; + } +}; + +struct tanh { + MSHADOW_XINLINE static real_t Map(real_t a) { + return tanhf( a ); + } +}; + +struct tanh_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return 1.0f - a * a; + } +}; + + +struct square { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a * a; + } +}; + +/*! \brief used for generate Bernoulli mask */ +struct threshold { + MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { + return a < b ? 1.0f : 0.0f; + } +}; + +/*! \brief used for generate element of power */ +struct power { + MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { + return powf( a, b ); + } +}; + +/*!\ \brief used for generate element sqrt */ +struct square_root { + MSHADOW_XINLINE static real_t Map(real_t a) { + return sqrt(a); + } +}; + +} // namespace op +} // namespace mxnet + +#endif // SRC_OPERATOR_OP_H_ + + + From 062dba5c14ab2ebc34a861748172a6fe7a5a6a7d Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 20 Jun 2015 15:57:57 -0600 Subject: [PATCH 04/12] chg --- ...ivation_op-inl.hpp => activation_op-inl.h} | 0 src/operator/{op.h => mshadow_op.h} | 0 src/operator/operator.cc | 18 +++++ src/operator/operator.cu | 20 ++++++ src/operator/operator_helper.h | 47 ++++++++++++ src/operator/param.h | 71 +++++++++++++++++++ 6 files changed, 156 insertions(+) rename src/operator/{activation_op-inl.hpp => activation_op-inl.h} (100%) rename src/operator/{op.h => mshadow_op.h} (100%) create mode 100644 src/operator/operator.cc create mode 100644 src/operator/operator.cu create mode 100644 src/operator/operator_helper.h create mode 100644 src/operator/param.h diff --git a/src/operator/activation_op-inl.hpp b/src/operator/activation_op-inl.h similarity index 100% rename from src/operator/activation_op-inl.hpp rename to src/operator/activation_op-inl.h diff --git a/src/operator/op.h b/src/operator/mshadow_op.h similarity index 100% rename from src/operator/op.h rename to src/operator/mshadow_op.h diff --git a/src/operator/operator.cc b/src/operator/operator.cc new file mode 100644 index 000000000000..bee6238d8bce --- /dev/null +++ b/src/operator/operator.cc @@ -0,0 +1,18 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator.cc + * \brief + * \author Bing Xu +*/ +#include "operator_helper.h" + +namespace mxnet { +namespace op { + +Operator * CreateOperator(OpType type) { + return OperatorFactory(type); +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/operator.cu b/src/operator/operator.cu new file mode 100644 index 000000000000..3cc1ada28e4b --- /dev/null +++ b/src/operator/operator.cu @@ -0,0 +1,20 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator.cu + * \brief + * \author Bing Xu +*/ + + +#include "operator_helper.h" + +namespace mxnet { +namespace op { + +Operator * CreateOperator(OpType type) { + return OperatorFactory(type); +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/operator_helper.h b/src/operator/operator_helper.h new file mode 100644 index 000000000000..b2e2ec0b5050 --- /dev/null +++ b/src/operator/operator_helper.h @@ -0,0 +1,47 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file assign_helper.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_HELPER_H_ +#define MXNET_OPERATOR_HELPER_H_ +#include "activation_op-inl.h" +#include "mshadow_op.h" + +namespace mxnet { +namespace op { + +enum OpType { + kReLU = 0, +}; + + +template +inline void Assign(const Exp &exp, + const mshadow::Tensor &out, + const Operator::GradReqType &req) { + switch (req) { + case Operator::kNullOp: + break; + case Operator::kWriteTo: + case Operator::kWriteInplace: + break; + case Operator::kAddTo: + break; + } +} + +template +Operator *OperatorFactory(OpType type) { + switch (type) { + case kReLU: + return new ActivationOp(); + + }; + return NULL; +} + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_HELPER_H_ diff --git a/src/operator/param.h b/src/operator/param.h new file mode 100644 index 000000000000..336c833165f8 --- /dev/null +++ b/src/operator/param.h @@ -0,0 +1,71 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file param.h + * \brief operator params + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_PARAM_H_ +#define MXNET_OPERATOR_PARAM_H_ +#pragma once + +namespace mxnet { +namespace op { +struct Param { + /*! \brief number of hidden layers */ + int num_hidden; + /*! \brief number of output channel */ + int num_channel; + /*! \brief number of parallel group */ + int num_group; + /*! \brief kernel height */ + int kernel_y; + /*! \brief kernel width */ + int kernel_x; + /*! \brief stride in y dimension*/ + int stride_y; + /*! \brief stride in x dimension */ + int stride_x; + /*! \brief padding in y dimension */ + int pad_y; + /*! \brief padding in x dimension */ + int pad_x; + /*! \brief whether not include bias term */ + int no_bias; + /*! \brief maximum temp_col_size allowed in each layer */ + int temp_col_max; + /*! \brief number of input channels */ + int num_input_channel; + /*! \brief number of input hidden nodes, used by fullc */ + int num_input_node; + /*! \brief reserved fields, for future compatibility */ + int reserved[64]; + inline void SetParam(const char *name, const char* val) { + if (!strcmp(name, "nhidden")) num_hidden = atoi(val); + if (!strcmp(name, "nchannel")) num_channel = atoi(val); + if (!strcmp(name, "ngroup")) num_group = atoi(val); + if (!strcmp(name, "kernel_size")) { + kernel_y = kernel_x = atoi(val); + } + if (!strcmp(name, "kernel_height")) kernel_height = atoi(val); + if (!strcmp(name, "kernel_width")) kernel_width = atoi(val); + if (!strcmp(name, "stride")) { + stride_y = stride_x = atoi(val); + } + if (!strcmp(name, "stride_y")) stride_y = atoi(val); + if (!strcmp(name, "stride_x")) stride_x = atoi(val); + + if (!strcmp(name, "pad")) { + pad_y = pad_x = atoi(val); + } + if (!strcmp(name, "pad_y")) pad_y = atoi(val); + if (!strcmp(name, "pad_x")) pad_x = atoi(val); + if (!strcmp(name, "no_bias")) no_bias = atoi(val); + if (!strcmp(name, "temp_col_max")) temp_col_max = atoi(val) << 18; + } +}; // struct Param +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_PARAM_H_ + + From 12b1c97cf498926208e79ae61da563b5b5516e25 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 20 Jun 2015 16:00:08 -0600 Subject: [PATCH 05/12] compile --- Makefile | 11 +++++++---- include/mxnet/narray.h | 17 +++++++++-------- include/mxnet/operator.h | 2 +- src/dag_engine/simple_engine.cc | 7 +++---- src/narray/narray_op-inl.h | 2 +- src/operator/mshadow_op.h | 8 ++++---- 6 files changed, 25 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index ff8cd7fd4d02..418533577dec 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,7 @@ endif ifneq ($(ADD_CFLAGS), NONE) CFLAGS += $(ADD_CFLAGS) + CFLAGS += -DDMLC_USE_CXX11=1 endif ifneq ($(ADD_LDFLAGS), NONE) @@ -47,8 +48,8 @@ ifneq ($(ADD_LDFLAGS), NONE) endif OBJ = storage.o narray_op_cpu.o -OBJCXX11 = engine.o narray.o -CUOBJ = narray_op_gpu.o +OBJCXX11 = engine.o narray.o operator.o +CUOBJ = narray_op_gpu.o operator_gpu.o LIB_DEP = $(DMLC_CORE)/libdmlc.a @@ -64,6 +65,8 @@ engine.o: src/dag_engine/simple_engine.cc narray.o: src/narray/narray.cc narray_op_cpu.o: src/narray/narray_op_cpu.cc src/narray/narray_op-inl.h narray_op_gpu.o: src/narray/narray_op_gpu.cu src/narray/narray_op-inl.h +operator.o: src/operator/operator.cc +operator_gpu.o: src/operator/operator.cu $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) @@ -72,13 +75,13 @@ $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(OBJCXX11) : - $(CXX) -std=c++0x -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + $(CXX) -std=c++11 -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(SLIB) : $(CXX) $(CFLAGS) -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(CUOBJ) : - $(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) + $(NVCC) --std=c++11 -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) $(CUBIN) : $(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^) diff --git a/include/mxnet/narray.h b/include/mxnet/narray.h index 8e3a398fd9ad..287d9761a736 100644 --- a/include/mxnet/narray.h +++ b/include/mxnet/narray.h @@ -7,6 +7,7 @@ #define MXNET_NARRAY_H_ #include #include +#include #include "./base.h" #include "./storage.h" #include "./tensor_blob.h" @@ -25,7 +26,7 @@ class NArray { /*! \brief default cosntructor */ NArray() {} /*! - * \brief constructing a new dynamic NArray + * \brief constructing a new dynamic NArray * \param shape the shape of array * \param ctx context of NArray */ @@ -34,16 +35,16 @@ class NArray { } /*! * \brief constructing a static NArray that shares data with TBlob - * Use with caution: allocate ONLY ONE NArray for each TBlob, + * Use with caution: allocate ONLY ONE NArray for each TBlob, * make sure the memory region is available through out the life of NArray * \param data the memory content of static data * \param dev_id the device id this tensor sits at - */ + */ NArray(const TBlob &data, int dev_id) : ptr_(new Chunk(data, dev_id)) { } /*! - * \return the shape of current NArray + * \return the shape of current NArray */ inline const TShape &shape() const { return ptr_->data.shape_; @@ -57,7 +58,7 @@ class NArray { /*! \return whether this narray is not initialized */ inline bool is_empty() const { return ptr_.get() == nullptr; - } + } private: /*! \brief the real data chunk that backs NArray */ @@ -79,7 +80,7 @@ class NArray { Chunk() : static_data(true), delay_alloc(false) { var = DAGEngine::Get()->NewVar(); } - /*! \brief construct from static data */ + /*! \brief construct from static data */ Chunk(const TBlob &data, int dev_id) : data(data), static_data(true), @@ -118,14 +119,14 @@ class NArray { /*! \brief internal data of NArray */ std::shared_ptr ptr_; /*! - * \brief constructing a new dynamic NArray + * \brief constructing a new dynamic NArray * \param shape the shape of array * \param ctx context of NArray * \param delay_alloc whether delay the allocation */ NArray(const TShape &shape, Context ctx, bool delay_alloc) : ptr_(new Chunk(shape, ctx, delay_alloc)) { - } + } // add friend to helper functions template friend NArray BinaryEWise(const NArray &lhs, const NArray &rhs); diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index da371dc9b326..3107ce89f5c2 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -80,7 +80,7 @@ class Operator { const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, - const std::vector req); + const std::vector &req); }; } // namespace mxnet #endif // MXNET_OPERATOR_H_ diff --git a/src/dag_engine/simple_engine.cc b/src/dag_engine/simple_engine.cc index 2e35b2ff57fc..9ea42e979735 100644 --- a/src/dag_engine/simple_engine.cc +++ b/src/dag_engine/simple_engine.cc @@ -1,19 +1,18 @@ #include #include - namespace mxnet { class SimpleEngine : public DAGEngine { public: virtual void Push(AsyncOp exec_fun, Context exec_ctx, - const std::vector &use_vars, + const std::vector &use_vars, const std::vector &mutate_vars) { // cannot schedule async using naive way because deps are not captured LOG(FATAL) << "cannot schedule async operations"; } virtual void Push(Op exec_fun, Context exec_ctx, - const std::vector &use_vars, + const std::vector &use_vars, const std::vector &mutate_vars) { exec_fun(RunContext()); } @@ -25,7 +24,7 @@ class SimpleEngine : public DAGEngine { // that have the info about the variable // use ptr directly instead of ID because this avoids an indirect mapping return NULL; - } + } }; // implements the singleton factory DAGEngine* DAGEngine::Get() { diff --git a/src/narray/narray_op-inl.h b/src/narray/narray_op-inl.h index 918149ff298b..9891d9a993d0 100644 --- a/src/narray/narray_op-inl.h +++ b/src/narray/narray_op-inl.h @@ -19,7 +19,7 @@ namespace mxnet { namespace narray { // true implementation template -inline void Eval_(const TBlob &lhs, const TBlob &rhs, TBlob ret, RunContext ctx) { +inline void Eval_(const TBlob &lhs, const TBlob &rhs, TBlob ret, RunContext ctx) { using namespace mshadow::expr; mshadow::Stream *s = static_cast*>(ctx.stream); ret.FlatTo2D(s) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 32b848846f70..ac035b395b21 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -4,8 +4,8 @@ * \brief extra mshadow operation for mxnet * \author Bing Xu */ -#ifndef SRC_OPERATOR_OP_H_ -#define SRC_OPERATOR_OP_H_ +#ifndef MXNET_MSHADOW_OPERATOR_OP_H_ +#define MXNET_MSHADOW_OPERATOR_OP_H_ #pragma once #include @@ -38,7 +38,7 @@ struct sigmoid_grad { /*! \brief Rectified Linear Operation */ struct relu { MSHADOW_XINLINE static real_t Map(real_t a) { - return std::max(a, 0.0f); + return a > 0.0f ? a : 0.0f; } }; struct relu_grad { @@ -103,7 +103,7 @@ struct square_root { } // namespace op } // namespace mxnet -#endif // SRC_OPERATOR_OP_H_ +#endif // MXNET_MSHADOW_OPERATOR_OP_H_ From 043bfff06fab6e8c50b5a9511cfcbae137d254df Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 20 Jun 2015 16:11:00 -0600 Subject: [PATCH 06/12] act --- src/operator/activation_op-inl.h | 40 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index 7ac0ddc87a53..a96ce813ea9b 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -4,13 +4,13 @@ * \brief activation operator of mxnet */ -#ifndef SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ -#define SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ -#pragma once +#ifndef MXNET_ACTIVATION_OP_INL_HPP_ +#define MXNET_ACTIVATION_OP_INL_HPP_ #include #include namespace mxnet { +namespace op { template class ActivationOp : public Operator { public: @@ -24,14 +24,12 @@ class ActivationOp : public Operator { RunContext ctx, const std::vector &in_data, const std::vector &out_data) { - CHECK(out_data.size() == 1) << \ - "Activation Op: only 1 output data is allowed"; - CHECK(in_data.size() == 1) << \ - "Activation Op: only 1 input data is allowed"; + CHECK(out_data.size() == 1); + CHECK(in_data.size() == 1); mshadow::Stream *stream = \ static_cast *>(ctx.stream); - mshadow::Tensor in = in_data[0].FlatTo2D(stream); - mshadow::Tensor out = out_data[0].FlatTo2D(stream); + mshadow::Tensor in = in_data[0].FlatTo2D(stream); + mshadow::Tensor out = out_data[0].FlatTo2D(stream); out = mshadow::expr::F(in); } virtual void Backward(Option opt, @@ -39,24 +37,22 @@ class ActivationOp : public Operator { const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, - const std::vector req) { - CHECK(grad_next.size() == 1) << \ - "Activation Op: only 1 input grad is allowed"; - CHECK(in_data.size() == 1) << \ - "Activation Op: only 1 input data is allowed"; - CHECK(req.size() == 1) << \ - "Activation Op: only 1 req is allowed"; - CHECK(req[0] == kWriteInplace) << \ - "Activation Op: only support inplace mode"; + const std::vector &req) { + CHECK(grad_next.size() == 1); + CHECK(in_data.size() == 1); + CHECK(out_grad.size() == 1); + CHECK(req.size() == 1); + CHECK(req[0] == kWriteInplace); mshadow::Stream *stream = \ static_cast *>(ctx.stream); - mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); - mshadow::Tensor data = in_data[0].FlatTo2D(stream); - data = mshadow::expr::F(data) * grad; + mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); + mshadow::Tensor data = in_data[0].FlatTo2D(stream); + Assign(mshadow::expr::F(data) * grad, data, req[0]); } }; // class ActivationOp +} // namespace op } // namespace mxnet -#endif // SRC_OPERATOR_ACTIVATION_OP_INL_HPP_ +#endif // MXNET_ACTIVATION_OP_INL_HPP_ From e595baf3e02354ede6039728c9bda20730e9147d Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 20 Jun 2015 23:30:43 -0600 Subject: [PATCH 07/12] add fullc op --- include/mxnet/operator.h | 25 +++++-- src/operator/activation_op-inl.h | 15 ++-- src/operator/assign_helper.h | 29 ++++++++ src/operator/fully_connect_op-inl.h | 111 ++++++++++++++++++++++++++++ src/operator/operator.cc | 6 +- src/operator/operator_helper.h | 20 +---- src/operator/param.h | 6 +- 7 files changed, 181 insertions(+), 31 deletions(-) create mode 100644 src/operator/assign_helper.h create mode 100644 src/operator/fully_connect_op-inl.h diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 3107ce89f5c2..b5ddf919caac 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -26,8 +26,6 @@ class Operator { struct Option { /*! \brief whether it is training phase*/ int is_train; - /*! \brief whether propagate gradient to x in backprop */ - int prop_grad; }; /*! \briref gradient request type the request can have */ enum GradReqType { @@ -40,6 +38,19 @@ class Operator { /*! \brief add to the provided space */ kAddTo = 3 }; + /*! \brief argument request type the request can have */ + enum ArgReqType { + /*! \brief weight arg*/ + kWeightArg = 0, + /*! \brief bias arg*/ + kBiasArg = 1, + /*! \brief data args */ + kDataArg = 2, + }; + /*! \brief get request input arguments + * \param args empty vector of reqest argument type + */ + virtual void DescribeArgs(std::vector *args) = 0; /*! * \brief set param for the operator from string * \param name parameter name @@ -49,9 +60,14 @@ class Operator { /*! * \brief inter the shape of output given the input data * \param in_shape the shape of input arguments of the operator + * For unknown shape, left TShape size to 0, + * InferShape will try to fix a correct shape; + * For known shape, InferShape will check shape + * * \param out_shape the shape of outputs of the operator + * InferShape will modify the vector to fill output TShape */ - virtual void InferShape(const std::vector &in_shape, + virtual void InferShape(std::vector &in_shape, std::vector *out_shape) = 0; /*! * \brief perform a forward operation of operator, save the output to TBlob @@ -75,8 +91,7 @@ class Operator { * \param req_types request types of the gradient saving operation * \sa GradReqType */ - virtual void Backward(Option opt, - RunContext ctx, + virtual void Backward(RunContext ctx, const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index a96ce813ea9b..b46319e44c69 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -8,15 +8,21 @@ #define MXNET_ACTIVATION_OP_INL_HPP_ #include #include +#include "./assign_helper.h" namespace mxnet { namespace op { template class ActivationOp : public Operator { public: - virtual void InferShape(const std::vector &in_shape, + virtual void DescribeArgs(std::vector *args) { + args->clear(); + args->push_back(kDataArg); + } + virtual void InferShape(std::vector &in_shape, std::vector *out_shape) { - CHECK(in_shape.size() == 1) << "Activation Op: only 1 input is allowed"; + CHECK(in_shape.size() == 1) << "Only 1 input is allowed"; + CHECK(in_shape[0].Size() > 0) << "Must set input data shape"; TShape out = in_shape[0]; out_shape->push_back(out); } @@ -32,8 +38,7 @@ class ActivationOp : public Operator { mshadow::Tensor out = out_data[0].FlatTo2D(stream); out = mshadow::expr::F(in); } - virtual void Backward(Option opt, - RunContext ctx, + virtual void Backward(RunContext ctx, const std::vector &grad_next, const std::vector &in_data, const std::vector &out_grad, @@ -47,7 +52,7 @@ class ActivationOp : public Operator { static_cast *>(ctx.stream); mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); mshadow::Tensor data = in_data[0].FlatTo2D(stream); - Assign(mshadow::expr::F(data) * grad, data, req[0]); + Assign(data, mshadow::expr::F(data) * grad, req[0]); } }; // class ActivationOp } // namespace op diff --git a/src/operator/assign_helper.h b/src/operator/assign_helper.h new file mode 100644 index 000000000000..8926add161d0 --- /dev/null +++ b/src/operator/assign_helper.h @@ -0,0 +1,29 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file assign_helper.h + * \brief assign gradient + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_ASSIGN_HELPER_H_ +#define MXNET_OPERATOR_ASSIGN_HELPER_H_ +namespace mxnet { +namespace op { +template +inline void Assign(mshadow::Tensor &out, + const Exp &exp, + const Operator::GradReqType &req) { + switch (req) { + case Operator::kNullOp: + break; + case Operator::kWriteTo: + case Operator::kWriteInplace: + out = exp; + break; + case Operator::kAddTo: + out += exp; + break; + } +} +} //namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_ASSIGN_HELPER_H_ diff --git a/src/operator/fully_connect_op-inl.h b/src/operator/fully_connect_op-inl.h new file mode 100644 index 000000000000..e6e39010ec1e --- /dev/null +++ b/src/operator/fully_connect_op-inl.h @@ -0,0 +1,111 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file fully_connect_op-inl.hpp + * \brief fully connect operator + * \author Bing Xu +*/ + +#ifndef MXNET_FULLY_CONNECT_OP_INL_HPP_ +#define MXNET_FULLY_CONNECT_OP_INL_HPP_ + +#include +#include +#include "./assign_helper.h" +#include "./param.h" + +namespace mxnet { +namespace op { +template +class FullyConnectOp : public Operator { + public: + virtual void DescribeArgs(std::vector *args) { + args->clear(); + args->push_back(kDataArg); + args->push_back(kWeightArg); + args->push_back(kBiasArg); + } + virtual void SetParam(const char *name, const char *val) { + param_.SetParam(name, val); + } + virtual void InferShape(std::vector &in_shape, + std::vector *out_shape) { + CHECK(in_shape.size() == 3) << "Input:[data, weight, bias]"; + CHECK(param_.num_input_node > 0); + CHECK(param_.num_hidden > 0); + TShape &dshape = in_shape[0]; + TShape &wshape = in_shape[1]; + TShape &bshape = in_shape[2]; + if (wshape.Size() == 0) { + mshadow::Shape<2> ws = mshadow::Shape2(param_.num_hidden, + param_.num_input_node); + wshape = ws; + } else { + CHECK(wshape[0] == param_.num_hidden); + CHECK(wshape[1] == param_.num_input_node); + } + if (bshape.Size() == 0) { + mshadow::Shape<1> bs = mshadow::Shape1(param_.num_hidden); + bshape = bs; + } else { + CHECK(bshape[0] == param_.num_hidden); + } + CHECK(dshape.ndim() == 4 && dshape[3] == param_.num_input_node) << \ + "Input data should be 4D in batch-1-1-hidden"; + out_shape->clear(); + out_shape->push_back(dshape); + out_shape->at(0)[3] = param_.num_hidden; + } + virtual void Forward(Option opt, + RunContext ctx, + const std::vector &in_data, + const std::vector &out_data) { + CHECK(in_data.size() == 3) << "Input:[data, weight, bias]"; + CHECK(out_data.size() == 1); + mshadow::Stream *stream = \ + static_cast *>(ctx.stream); + mshadow::Tensor wmat = in_data[0].get(stream); + mshadow::Tensor bias = in_data[1].get(stream); + mshadow::Tensor data = in_data[2].FlatTo2D(stream); + mshadow::Tensor out = out_data[0].FlatTo2D(stream); + out = mshadow::expr::dot(data, wmat.T()); + if (!param_.no_bias) { + out += mshadow::expr::repmat(bias, data.size(0)); + } + } + virtual void Backward(RunContext ctx, + const std::vector &grad_next, + const std::vector &in_data, + const std::vector &out_grad, + const std::vector &req) { + CHECK(grad_next.size() == 1); + CHECK(in_data.size() == 3) << "Input: [data, weight, bias]"; + CHECK(out_grad.size() == 3) << "Output: [gdata, gweight, gbias]"; + CHECK(req.size() == 3); + mshadow::Stream *stream = \ + static_cast *>(ctx.stream); + mshadow::Tensor data = in_data[0].FlatTo2D(stream); + mshadow::Tensor wmat = in_data[1].get(stream); + mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); + mshadow::Tensor gdata = out_grad[0].FlatTo2D(stream); + mshadow::Tensor gwmat = out_grad[1].get(stream); + mshadow::Tensor gbias = out_grad[2].get(stream); + // backprop + CHECK(req[0] != kWriteInplace); + Assign(gwmat, mshadow::expr::dot(grad.T(), data), req[0]); + if (!param_.no_bias) { + Assign(gbias, mshadow::expr::sum_rows(grad), req[1]); + } + if (req[0] != kNullOp) { + CHECK(req[0] != kWriteInplace); + Assign(gdata, mshadow::expr::dot(grad, wmat), req[2]); + } + } + private: + Param param_; +}; // class FullyConnectOp +} // namespace op +} // namespace mxnet + +#endif // MXNET_FULLY_CONNECT_OP_INL_HPP + + diff --git a/src/operator/operator.cc b/src/operator/operator.cc index bee6238d8bce..1cab17a62a67 100644 --- a/src/operator/operator.cc +++ b/src/operator/operator.cc @@ -9,10 +9,10 @@ namespace mxnet { namespace op { -Operator * CreateOperator(OpType type) { +Operator *CreateOperator(OpType type) { return OperatorFactory(type); } -} // namespace op -} // namespace mxnet +} // namespace op +} // namespace mxnet diff --git a/src/operator/operator_helper.h b/src/operator/operator_helper.h index b2e2ec0b5050..9a673b12cedb 100644 --- a/src/operator/operator_helper.h +++ b/src/operator/operator_helper.h @@ -7,6 +7,7 @@ #ifndef MXNET_OPERATOR_HELPER_H_ #define MXNET_OPERATOR_HELPER_H_ #include "activation_op-inl.h" +#include "fully_connect_op-inl.h" #include "mshadow_op.h" namespace mxnet { @@ -14,29 +15,16 @@ namespace op { enum OpType { kReLU = 0, + kFullc = 1, }; - -template -inline void Assign(const Exp &exp, - const mshadow::Tensor &out, - const Operator::GradReqType &req) { - switch (req) { - case Operator::kNullOp: - break; - case Operator::kWriteTo: - case Operator::kWriteInplace: - break; - case Operator::kAddTo: - break; - } -} - template Operator *OperatorFactory(OpType type) { switch (type) { case kReLU: return new ActivationOp(); + case kFullc: + return new FullyConnectOp(); }; return NULL; diff --git a/src/operator/param.h b/src/operator/param.h index 336c833165f8..c17555898c0a 100644 --- a/src/operator/param.h +++ b/src/operator/param.h @@ -41,13 +41,15 @@ struct Param { int reserved[64]; inline void SetParam(const char *name, const char* val) { if (!strcmp(name, "nhidden")) num_hidden = atoi(val); + if (!strcmp(name, "num_input_node")) num_input_node = atoi(val); + if (!strcmp(name, "num_input_channel")) num_input_channel = atoi(val); if (!strcmp(name, "nchannel")) num_channel = atoi(val); if (!strcmp(name, "ngroup")) num_group = atoi(val); if (!strcmp(name, "kernel_size")) { kernel_y = kernel_x = atoi(val); } - if (!strcmp(name, "kernel_height")) kernel_height = atoi(val); - if (!strcmp(name, "kernel_width")) kernel_width = atoi(val); + if (!strcmp(name, "kernel_height")) kernel_y = atoi(val); + if (!strcmp(name, "kernel_width")) kernel_x = atoi(val); if (!strcmp(name, "stride")) { stride_y = stride_x = atoi(val); } From 3a76b8d720165db3d6b8a91b4f33cfe1d27e8111 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 20 Jun 2015 23:33:17 -0600 Subject: [PATCH 08/12] minor --- src/operator/activation_op-inl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index b46319e44c69..e49ad2097b12 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -52,7 +52,8 @@ class ActivationOp : public Operator { static_cast *>(ctx.stream); mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); mshadow::Tensor data = in_data[0].FlatTo2D(stream); - Assign(data, mshadow::expr::F(data) * grad, req[0]); + mshadow::Tensor out = in_data[0].FlatTo2D(stream); + Assign(out, mshadow::expr::F(data) * grad, req[0]); } }; // class ActivationOp } // namespace op From be5bf08d563ffbebe3f8b8fd6d2b1f57d97316cc Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 21 Jun 2015 11:00:56 -0600 Subject: [PATCH 09/12] minor --- Makefile | 9 ++- include/mxnet/operator.h | 53 +++++++----- src/operator/activation_op-inl.h | 32 ++++---- src/operator/assign_helper.h | 29 ------- src/operator/fully_connect_op-inl.h | 121 ++++++++++++++-------------- src/operator/mshadow_op.h | 7 +- src/operator/operator-inl.h | 35 ++++++++ src/operator/operator.cc | 43 +++++++--- src/operator/operator.cu | 20 ----- src/operator/operator_common.h | 67 +++++++++++++++ src/operator/operator_cpu.cc | 18 +++++ src/operator/operator_gpu.cu | 21 +++++ src/operator/operator_helper.h | 35 -------- src/operator/param.h | 2 +- 14 files changed, 288 insertions(+), 204 deletions(-) delete mode 100644 src/operator/assign_helper.h create mode 100644 src/operator/operator-inl.h delete mode 100644 src/operator/operator.cu create mode 100644 src/operator/operator_common.h create mode 100644 src/operator/operator_cpu.cc create mode 100644 src/operator/operator_gpu.cu delete mode 100644 src/operator/operator_helper.h diff --git a/Makefile b/Makefile index 418533577dec..b159e0bc9429 100644 --- a/Makefile +++ b/Makefile @@ -47,8 +47,8 @@ ifneq ($(ADD_LDFLAGS), NONE) LDFLAGS += $(ADD_LDFLAGS) endif -OBJ = storage.o narray_op_cpu.o -OBJCXX11 = engine.o narray.o operator.o +OBJ = storage.o narray_op_cpu.o operator.o operator_cpu.o +OBJCXX11 = engine.o narray.o CUOBJ = narray_op_gpu.o operator_gpu.o LIB_DEP = $(DMLC_CORE)/libdmlc.a @@ -66,7 +66,8 @@ narray.o: src/narray/narray.cc narray_op_cpu.o: src/narray/narray_op_cpu.cc src/narray/narray_op-inl.h narray_op_gpu.o: src/narray/narray_op_gpu.cu src/narray/narray_op-inl.h operator.o: src/operator/operator.cc -operator_gpu.o: src/operator/operator.cu +operator_cpu.o: src/operator/operator_cpu.cc +operator_gpu.o: src/operator/operator_gpu.cu $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) @@ -81,7 +82,7 @@ $(SLIB) : $(CXX) $(CFLAGS) -shared -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS) $(CUOBJ) : - $(NVCC) --std=c++11 -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) + $(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) $(CUBIN) : $(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^) diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index b5ddf919caac..c0179990058d 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -1,13 +1,13 @@ /*! * Copyright (c) 2015 by Contributors * \file operator.h - * \brief operator interface of mxnet + * \brief static operator interface of mxnet */ #ifndef MXNET_OPERATOR_H_ #define MXNET_OPERATOR_H_ +// this file will be seen by cuda, no c++11 for now #include #include "./base.h" -#include "./narray.h" #include "./tensor_blob.h" namespace mxnet { @@ -38,19 +38,24 @@ class Operator { /*! \brief add to the provided space */ kAddTo = 3 }; - /*! \brief argument request type the request can have */ - enum ArgReqType { - /*! \brief weight arg*/ - kWeightArg = 0, - /*! \brief bias arg*/ - kBiasArg = 1, - /*! \brief data args */ - kDataArg = 2, + /*! \brief input argument type of the operator have */ + enum ArgType { + /*! \brief data argument */ + kDataArg = 0, + /*! \brief weight argument */ + kWeightArg = 1, + /*! \brief bias argument */ + kBiasArg = 2 }; - /*! \brief get request input arguments - * \param args empty vector of reqest argument type + /*! + * \brief get types of input argument of this oeprator + * \return a vector corresponding to type of each argument + * this order is same as the order of inputs in Forward, InferShape and Backward */ - virtual void DescribeArgs(std::vector *args) = 0; + virtual std::vector DescribeArgs() const { + // default most of layers only have one data argument + return std::vector(1, kDataArg); + } /*! * \brief set param for the operator from string * \param name parameter name @@ -58,16 +63,19 @@ class Operator { */ virtual void SetParam(const char *name, const char *val) {} /*! - * \brief inter the shape of output given the input data + * \brief inter the shapes of outputs and unknown input arguments * \param in_shape the shape of input arguments of the operator - * For unknown shape, left TShape size to 0, - * InferShape will try to fix a correct shape; - * For known shape, InferShape will check shape + * this should be of same length as the vector returned by DescribeArgs + * in_shape allows unknown elements, which are checked by shape.ndim() == 0. + * For unknown shapes, InferShape will try to fill in the correct Shape in in_shape + * For known shapes, InferShape will check shape consistency + * + * common practice: set the shape of data input, and usually weight's shape can be infered * * \param out_shape the shape of outputs of the operator - * InferShape will modify the vector to fill output TShape + * InferShape will modify the vector to fill output TShape */ - virtual void InferShape(std::vector &in_shape, + virtual void InferShape(std::vector *in_shape, std::vector *out_shape) = 0; /*! * \brief perform a forward operation of operator, save the output to TBlob @@ -96,6 +104,13 @@ class Operator { const std::vector &in_data, const std::vector &out_grad, const std::vector &req); + + /*! + * \brief factory unction, create a new operator + * \param type the type of operator + * \param ctx the context device type of operator + */ + static Operator *Create(const char *type, Context ctx); }; } // namespace mxnet #endif // MXNET_OPERATOR_H_ diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index e49ad2097b12..0e87020ea1c6 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -1,30 +1,28 @@ /*! * Copyright (c) 2015 by Contributors - * \file activation_op-inl.hpp + * \file activation_op-inl.h * \brief activation operator of mxnet */ -#ifndef MXNET_ACTIVATION_OP_INL_HPP_ -#define MXNET_ACTIVATION_OP_INL_HPP_ -#include +#ifndef MXNET_OPERATOR_ACTIVATION_OP_INL_H_ +#define MXNET_OPERATOR_ACTIVATION_OP_INL_H_ + #include -#include "./assign_helper.h" +#include +#include +#include "./operator_common.h" namespace mxnet { namespace op { template class ActivationOp : public Operator { public: - virtual void DescribeArgs(std::vector *args) { - args->clear(); - args->push_back(kDataArg); - } - virtual void InferShape(std::vector &in_shape, + virtual void InferShape(std::vector *in_shape, std::vector *out_shape) { - CHECK(in_shape.size() == 1) << "Only 1 input is allowed"; - CHECK(in_shape[0].Size() > 0) << "Must set input data shape"; - TShape out = in_shape[0]; - out_shape->push_back(out); + CHECK(in_shape->size() == 1) << "Only 1 input is allowed"; + CHECK((*in_shape)[0].ndim() != 0 ) << "Require data shape to be known"; + out_shape->clear(); + out_shape->push_back((*in_shape)[0]); } virtual void Forward(Option opt, RunContext ctx, @@ -53,12 +51,10 @@ class ActivationOp : public Operator { mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); mshadow::Tensor data = in_data[0].FlatTo2D(stream); mshadow::Tensor out = in_data[0].FlatTo2D(stream); - Assign(out, mshadow::expr::F(data) * grad, req[0]); + Assign(out, req[0], mshadow::expr::F(data) * grad); } }; // class ActivationOp } // namespace op } // namespace mxnet -#endif // MXNET_ACTIVATION_OP_INL_HPP_ - - +#endif // MXNET_OPERATOR_ACTIVATION_OP_INL_H_ diff --git a/src/operator/assign_helper.h b/src/operator/assign_helper.h deleted file mode 100644 index 8926add161d0..000000000000 --- a/src/operator/assign_helper.h +++ /dev/null @@ -1,29 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file assign_helper.h - * \brief assign gradient - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_ASSIGN_HELPER_H_ -#define MXNET_OPERATOR_ASSIGN_HELPER_H_ -namespace mxnet { -namespace op { -template -inline void Assign(mshadow::Tensor &out, - const Exp &exp, - const Operator::GradReqType &req) { - switch (req) { - case Operator::kNullOp: - break; - case Operator::kWriteTo: - case Operator::kWriteInplace: - out = exp; - break; - case Operator::kAddTo: - out += exp; - break; - } -} -} //namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_ASSIGN_HELPER_H_ diff --git a/src/operator/fully_connect_op-inl.h b/src/operator/fully_connect_op-inl.h index e6e39010ec1e..a7f07601b374 100644 --- a/src/operator/fully_connect_op-inl.h +++ b/src/operator/fully_connect_op-inl.h @@ -1,16 +1,16 @@ /*! * Copyright (c) 2015 by Contributors - * \file fully_connect_op-inl.hpp + * \file fully_connect_op-inl.h * \brief fully connect operator * \author Bing Xu */ +#ifndef MXNET_OPERATOR_FULLY_CONNECT_OP_INL_H_ +#define MXNET_OPERATOR_FULLY_CONNECT_OP_INL_H_ -#ifndef MXNET_FULLY_CONNECT_OP_INL_HPP_ -#define MXNET_FULLY_CONNECT_OP_INL_HPP_ - -#include #include -#include "./assign_helper.h" +#include +#include +#include "./operator_common.h" #include "./param.h" namespace mxnet { @@ -18,58 +18,55 @@ namespace op { template class FullyConnectOp : public Operator { public: - virtual void DescribeArgs(std::vector *args) { - args->clear(); - args->push_back(kDataArg); - args->push_back(kWeightArg); - args->push_back(kBiasArg); + virtual std::vector DescribeArgs() const { + ArgType ret[] = {kDataArg, kWeightArg, kBiasArg}; + if (param_.no_bias == 0) { + return std::vector(ret, ret + 3); + } else { + return std::vector(ret, ret + 2); + } } virtual void SetParam(const char *name, const char *val) { param_.SetParam(name, val); } - virtual void InferShape(std::vector &in_shape, + virtual void InferShape(std::vector *in_shape, std::vector *out_shape) { - CHECK(in_shape.size() == 3) << "Input:[data, weight, bias]"; - CHECK(param_.num_input_node > 0); - CHECK(param_.num_hidden > 0); - TShape &dshape = in_shape[0]; - TShape &wshape = in_shape[1]; - TShape &bshape = in_shape[2]; - if (wshape.Size() == 0) { - mshadow::Shape<2> ws = mshadow::Shape2(param_.num_hidden, - param_.num_input_node); - wshape = ws; + using namespace mshadow; + if (param_.no_bias == 0) { + CHECK(in_shape->size() == 3) << "Input:[data, weight, bias]"; } else { - CHECK(wshape[0] == param_.num_hidden); - CHECK(wshape[1] == param_.num_input_node); + CHECK(in_shape->size() == 2) << "Input:[data, weight]"; } - if (bshape.Size() == 0) { - mshadow::Shape<1> bs = mshadow::Shape1(param_.num_hidden); - bshape = bs; - } else { - CHECK(bshape[0] == param_.num_hidden); + CHECK(param_.num_hidden > 0); + const TShape &dshape = (*in_shape)[0]; + CHECK(dshape.ndim() == 4) << \ + "Input data should be 4D in batch-1-1-hidden"; + CHECK(dshape.ndim() != 0) << "Require data shape to be known"; + ShapeAssignCheck((*in_shape)[1], Shape2(param_.num_hidden, dshape[3])); + if (param_.no_bias == 0) { + ShapeAssignCheck((*in_shape)[2], Shape1(param_.num_hidden)); } - CHECK(dshape.ndim() == 4 && dshape[3] == param_.num_input_node) << \ - "Input data should be 4D in batch-1-1-hidden"; out_shape->clear(); out_shape->push_back(dshape); - out_shape->at(0)[3] = param_.num_hidden; + (*out_shape)[0][3] = param_.num_hidden; } virtual void Forward(Option opt, RunContext ctx, const std::vector &in_data, const std::vector &out_data) { - CHECK(in_data.size() == 3) << "Input:[data, weight, bias]"; + using namespace mshadow; + using namespace mshadow::expr; + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK(in_data.size() == expected); CHECK(out_data.size() == 1); - mshadow::Stream *stream = \ - static_cast *>(ctx.stream); - mshadow::Tensor wmat = in_data[0].get(stream); - mshadow::Tensor bias = in_data[1].get(stream); - mshadow::Tensor data = in_data[2].FlatTo2D(stream); - mshadow::Tensor out = out_data[0].FlatTo2D(stream); - out = mshadow::expr::dot(data, wmat.T()); - if (!param_.no_bias) { - out += mshadow::expr::repmat(bias, data.size(0)); + Stream *s = static_cast *>(ctx.stream); + Tensor data = in_data[0].FlatTo2D(s); + Tensor wmat = in_data[1].get(s); + Tensor out = out_data[0].FlatTo2D(s); + out = dot(data, wmat.T()); + if (param_.no_bias == 0) { + Tensor bias = in_data[2].get(s); + out += repmat(bias, data.size(0)); } } virtual void Backward(RunContext ctx, @@ -77,28 +74,29 @@ class FullyConnectOp : public Operator { const std::vector &in_data, const std::vector &out_grad, const std::vector &req) { + using namespace mshadow; + using namespace mshadow::expr; CHECK(grad_next.size() == 1); - CHECK(in_data.size() == 3) << "Input: [data, weight, bias]"; - CHECK(out_grad.size() == 3) << "Output: [gdata, gweight, gbias]"; + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK(in_data.size() == expected && out_grad.size() == expected); CHECK(req.size() == 3); - mshadow::Stream *stream = \ - static_cast *>(ctx.stream); - mshadow::Tensor data = in_data[0].FlatTo2D(stream); - mshadow::Tensor wmat = in_data[1].get(stream); - mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); - mshadow::Tensor gdata = out_grad[0].FlatTo2D(stream); - mshadow::Tensor gwmat = out_grad[1].get(stream); - mshadow::Tensor gbias = out_grad[2].get(stream); + Stream *s = static_cast *>(ctx.stream); + Tensor data = in_data[0].FlatTo2D(s); + Tensor wmat = in_data[1].get(s); + Tensor grad = grad_next[0].FlatTo2D(s); // backprop - CHECK(req[0] != kWriteInplace); - Assign(gwmat, mshadow::expr::dot(grad.T(), data), req[0]); - if (!param_.no_bias) { - Assign(gbias, mshadow::expr::sum_rows(grad), req[1]); - } - if (req[0] != kNullOp) { - CHECK(req[0] != kWriteInplace); - Assign(gdata, mshadow::expr::dot(grad, wmat), req[2]); + CHECK(req[1] != kWriteInplace) << "cannot write weight inplace"; + // gradient of weight + Tensor gwmat = out_grad[1].get(s); + Assign(gwmat, req[1], dot(grad.T(), data)); + // gradient of bias + if (param_.no_bias == 0) { + Tensor gbias = out_grad[2].get(s); + Assign(gbias, req[2], sum_rows(grad)); } + // gradient of data + Tensor gdata = out_grad[0].FlatTo2D(s); + Assign(gdata, req[0], dot(grad, wmat)); } private: Param param_; @@ -106,6 +104,5 @@ class FullyConnectOp : public Operator { } // namespace op } // namespace mxnet -#endif // MXNET_FULLY_CONNECT_OP_INL_HPP - +#endif // MXNET_OPERATOR_FULLY_CONNECT_OP_INL_H_ diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index ac035b395b21..7c2f0c7b6a76 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -1,13 +1,12 @@ /*! * Copyright (c) 2015 by Contributors - * \file op.h + * \file mshadow_op.h * \brief extra mshadow operation for mxnet * \author Bing Xu */ #ifndef MXNET_MSHADOW_OPERATOR_OP_H_ #define MXNET_MSHADOW_OPERATOR_OP_H_ -#pragma once - +#include #include namespace mxnet { @@ -105,5 +104,3 @@ struct square_root { #endif // MXNET_MSHADOW_OPERATOR_OP_H_ - - diff --git a/src/operator/operator-inl.h b/src/operator/operator-inl.h new file mode 100644 index 000000000000..7bdd0a1b96d1 --- /dev/null +++ b/src/operator/operator-inl.h @@ -0,0 +1,35 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator-inl.h + * \brief device invarient code to create operators + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_INL_H_ +#define MXNET_OPERATOR_INL_H_ +#include +#include +#include "./mshadow_op.h" +#include "./activation_op-inl.h" +#include "./fully_connect_op-inl.h" + +namespace mxnet { +namespace op { +/*! + * \brief device invariant function to create operators + * \param type the type of operator + * \tparam xpu the device type we are at + */ +template +inline Operator *CreateOperator_(OpType type) { + switch (type) { + case kReLU: + return new ActivationOp(); + case kFullc: + return new FullyConnectOp(); + default: LOG(FATAL) << "unknown OpType"; + } + return NULL; +} +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_INL_H_ diff --git a/src/operator/operator.cc b/src/operator/operator.cc index 1cab17a62a67..e56d6049eca9 100644 --- a/src/operator/operator.cc +++ b/src/operator/operator.cc @@ -1,18 +1,39 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file operator.cc - * \brief - * \author Bing Xu -*/ -#include "operator_helper.h" +#include +#include +#include +#include +#include "./operator_common.h" namespace mxnet { namespace op { +// declare the operator +template +Operator *CreateOperator(OpType type); -Operator *CreateOperator(OpType type) { - return OperatorFactory(type); + +OpType GetOpTpe(const char *type) { + if (!strcmp(type, "relu")) return kReLU; + if (!strcmp(type, "fullc")) return kFullc; + LOG(FATAL) << "unknown operator type " << type; + return kReLU; +} } -} // namespace op -} // namespace mxnet +// implementing the context +Operator *Operator::Create(const char *type, + Context ctx) { + op::OpType otype = op::GetOpTpe(type); + if (ctx.dev_mask == cpu::kDevMask) { + return op::CreateOperator(otype); + } + if (ctx.dev_mask == gpu::kDevMask) { +#if MXNET_USE_CUDA + return op::CreateOperator(otype); +#else + LOG(FATAL) << "GPU is not enabled"; +#endif + } + return NULL; +} +} // namespace mxnet diff --git a/src/operator/operator.cu b/src/operator/operator.cu deleted file mode 100644 index 3cc1ada28e4b..000000000000 --- a/src/operator/operator.cu +++ /dev/null @@ -1,20 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file operator.cu - * \brief - * \author Bing Xu -*/ - - -#include "operator_helper.h" - -namespace mxnet { -namespace op { - -Operator * CreateOperator(OpType type) { - return OperatorFactory(type); -} - -} // namespace op -} // namespace mxnet - diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h new file mode 100644 index 000000000000..8fb1066333b3 --- /dev/null +++ b/src/operator/operator_common.h @@ -0,0 +1,67 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator_common.h + * \brief common internal header of most operators + * this header includes utility functions operator can use + * common type definitions + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_OPERATOR_COMMON_H_ +#define MXNET_OPERATOR_OPERATOR_COMMON_H_ + +#include +#include + +namespace mxnet { +namespace op { +/*! + * \brief assign the expression to out according to request + * \param out the data to be assigned + * \param req the assignment request + * \param exp the expression + * \tparam OType output type + * \tparam Exp expression type + */ +template +inline void Assign(OType &out, + Operator::GradReqType req, + const Exp &exp) { + switch (req) { + case Operator::kNullOp: break; + case Operator::kWriteTo: + case Operator::kWriteInplace: out = exp; break; + case Operator::kAddTo: out += exp; break; + default: LOG(FATAL) << "not reached"; + } +} +/*! + * \brief assign shape to out if out is unknown + * otherwise check consistency + * \param out the output shape to be stored + * \param shape the infered shape + */ +template +inline void ShapeAssignCheck(TShape &out, const TS &shape) { + if (out.ndim() == 0) { + out = shape; + } else { + CHECK(out == shape) << "InferShape:: shape inconsistent"; + } +} + +/*! \brief type of operators */ +enum OpType { + kReLU = 0, + kFullc = 1 +}; + +/*! + * \brief device invariant function to create operators + * \param type the type of operator + * \tparam xpu the device type we are at + */ +template +Operator *CreateOperator(OpType type); +} //namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_COMMON_H_ diff --git a/src/operator/operator_cpu.cc b/src/operator/operator_cpu.cc new file mode 100644 index 000000000000..3d5e7c5f3248 --- /dev/null +++ b/src/operator/operator_cpu.cc @@ -0,0 +1,18 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator_cpu.cc + * \brief CPU specialization of operator codes + * \author Bing Xu +*/ +#include "./operator-inl.h" + +namespace mxnet { +namespace op { + +template<> +Operator *CreateOperator(OpType type) { + return CreateOperator_(type); +} + +} // namespace op +} // namespace mxnet diff --git a/src/operator/operator_gpu.cu b/src/operator/operator_gpu.cu new file mode 100644 index 000000000000..8fb3b2751f13 --- /dev/null +++ b/src/operator/operator_gpu.cu @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file operator_gpu.cu + * \brief GPU specialization of operator code + * \author Bing Xu +*/ +#include +#include +#include "operator-inl.h" + +namespace mxnet { +namespace op { + +template<> +Operator *CreateOperator(OpType type) { + return CreateOperator_(type); +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/operator_helper.h b/src/operator/operator_helper.h deleted file mode 100644 index 9a673b12cedb..000000000000 --- a/src/operator/operator_helper.h +++ /dev/null @@ -1,35 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file assign_helper.h - * \brief - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_HELPER_H_ -#define MXNET_OPERATOR_HELPER_H_ -#include "activation_op-inl.h" -#include "fully_connect_op-inl.h" -#include "mshadow_op.h" - -namespace mxnet { -namespace op { - -enum OpType { - kReLU = 0, - kFullc = 1, -}; - -template -Operator *OperatorFactory(OpType type) { - switch (type) { - case kReLU: - return new ActivationOp(); - case kFullc: - return new FullyConnectOp(); - - }; - return NULL; -} - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_HELPER_H_ diff --git a/src/operator/param.h b/src/operator/param.h index c17555898c0a..0d8016983c5a 100644 --- a/src/operator/param.h +++ b/src/operator/param.h @@ -6,10 +6,10 @@ */ #ifndef MXNET_OPERATOR_PARAM_H_ #define MXNET_OPERATOR_PARAM_H_ -#pragma once namespace mxnet { namespace op { +/*! \brief possible parameter for each operator */ struct Param { /*! \brief number of hidden layers */ int num_hidden; From 9c214c844720ed164b19b99b974c7cd7c7747f6b Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 21 Jun 2015 12:07:42 -0600 Subject: [PATCH 10/12] minor in act --- include/mxnet/operator.h | 7 ++++--- src/operator/activation_op-inl.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index c0179990058d..0dd6eb935b82 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -69,7 +69,7 @@ class Operator { * in_shape allows unknown elements, which are checked by shape.ndim() == 0. * For unknown shapes, InferShape will try to fill in the correct Shape in in_shape * For known shapes, InferShape will check shape consistency - * + * * common practice: set the shape of data input, and usually weight's shape can be infered * * \param out_shape the shape of outputs of the operator @@ -81,7 +81,7 @@ class Operator { * \brief perform a forward operation of operator, save the output to TBlob * \param opt option on Forward such as whether this is training phase * \param ctx runtime context - * \param in_data array of input data + * \param in_data array of input data, it is const * \param out_data array of output data, * the space of TBlob in out_data must be pre-allocated with InferShape */ @@ -97,6 +97,7 @@ class Operator { * \param out_grad array of output gradient, there could be three possible TBlob * in the each element in the array * \param req_types request types of the gradient saving operation + * only inplace will change input data * \sa GradReqType */ virtual void Backward(RunContext ctx, @@ -104,7 +105,7 @@ class Operator { const std::vector &in_data, const std::vector &out_grad, const std::vector &req); - + /*! * \brief factory unction, create a new operator * \param type the type of operator diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index 0e87020ea1c6..e02da0109aef 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -50,8 +50,9 @@ class ActivationOp : public Operator { static_cast *>(ctx.stream); mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); mshadow::Tensor data = in_data[0].FlatTo2D(stream); - mshadow::Tensor out = in_data[0].FlatTo2D(stream); - Assign(out, req[0], mshadow::expr::F(data) * grad); + mshadow::Tensor out = out_grad[0].FlatTo2D(stream); + Assign(out, req[0], mshadow::expr::F( + mshadow::expr::F(data)) * grad); } }; // class ActivationOp } // namespace op From 7a1296d5273cb7d013ead7e615ce0ffa552958ad Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 21 Jun 2015 12:12:30 -0600 Subject: [PATCH 11/12] remove inplace check --- src/operator/activation_op-inl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/operator/activation_op-inl.h b/src/operator/activation_op-inl.h index e02da0109aef..2a412ef3b2e1 100644 --- a/src/operator/activation_op-inl.h +++ b/src/operator/activation_op-inl.h @@ -45,7 +45,6 @@ class ActivationOp : public Operator { CHECK(in_data.size() == 1); CHECK(out_grad.size() == 1); CHECK(req.size() == 1); - CHECK(req[0] == kWriteInplace); mshadow::Stream *stream = \ static_cast *>(ctx.stream); mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); From 4d175e439e03634e1c8b4847ec39171abd468a82 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 21 Jun 2015 12:59:49 -0600 Subject: [PATCH 12/12] add property --- include/mxnet/operator.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 0dd6eb935b82..a9b3c9f2b3ae 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -47,6 +47,12 @@ class Operator { /*! \brief bias argument */ kBiasArg = 2 }; + enum Property { + /*! \brief Op contains interanl state, won't influence engine schedule */ + kContainInteralState = 1, + /*! \brief Op forward require random number, will influence engine schedule */ + kForwardRequireRnd = 2, + }; /*! * \brief get types of input argument of this oeprator * \return a vector corresponding to type of each argument @@ -56,6 +62,14 @@ class Operator { // default most of layers only have one data argument return std::vector(1, kDataArg); } + /*! + * \brief describe property of op + * \return a bit map in int + */ + virtual int DescribeProperty() const { + // default most of layer only conatin internal state + return kContainInteralState; + } /*! * \brief set param for the operator from string * \param name parameter name