From fcb45cbb2038364735ca5a27a4dae4e3604016bc Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 15 Aug 2015 16:20:44 -0600 Subject: [PATCH 01/11] Add activation op --- Makefile | 7 +- src/operator/activation-inl.h | 141 ++++++++++++++++++++++++++++++++++ src/operator/activation.cc | 29 +++++++ src/operator/activation.cu | 21 +++++ 4 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 src/operator/activation-inl.h create mode 100644 src/operator/activation.cc create mode 100644 src/operator/activation.cu diff --git a/Makefile b/Makefile index 581674c784a2..da029f77ef27 100644 --- a/Makefile +++ b/Makefile @@ -58,14 +58,14 @@ endif BIN = test/api_registry_test OBJ = storage.o narray_op_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o fully_connected_cpu.o static_graph.o +OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o fully_connected_cpu.o static_graph.o activation_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a LIB_DEP = $(DMLC_CORE)/libdmlc.a ifeq ($(USE_CUDA), 1) - CUOBJ += narray_op_gpu.o fully_connected_gpu.o + CUOBJ += narray_op_gpu.o fully_connected_gpu.o activation_gpu.o endif .PHONY: clean all test lint doc @@ -87,7 +87,8 @@ c_api.o: src/c_api.cc operator.o: src/operator/static_operator_wrapper.cc fully_connected_cpu.o: src/operator/fully_connected.cc fully_connected_gpu.o: src/operator/fully_connected.cu - +activation_cpu.o: src/operator/activation.cc +activation_gpu.o: src/operator/activation.cu lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h new file mode 100644 index 000000000000..221e69ce948f --- /dev/null +++ b/src/operator/activation-inl.h @@ -0,0 +1,141 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file activation-inl.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_ACTIVATION_INL_H_ +#define MXNET_OPERATOR_ACTIVATION_INL_H_ +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { +// Declare enumeration of input order to make code more intuitive. +// // These enums are only visible within this header +enum ActivationOpInputs {kData}; +enum ActivationOpOutputs {kOut}; +enum ActivationOpType {kReLU}; +/** + * \brief This is the implementation of activation operator. + * \tparam xpu The device that the op will be executed on. + */ +template +class ActivationOp : public Operator { + public: + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(req[kOut], kWriteTo); + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + Stream *s = static_cast *>(ctx.stream); + Tensor data = in_data[kData].FlatTo2D(s); + Tensor out = out_data[kOut].FlatTo2D(s); + out = F(data); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(out_grad.size(), 1); + CHECK(in_data.size() == 1 && in_grad.size() == 1); + CHECK_EQ(req.size(), 1); + Stream *s = static_cast *>(ctx.stream); + Tensor out_gradient = out_grad[kData].FlatTo2D(s); + Tensor data = in_data[kData].FlatTo2D(s); + Tensor grad = out_grad[kOut].FlatTo2D(s); + Assign(grad, req[kData], F(out_gradient * F(data))); + } +}; // class ActivationOp + +// Decalre Factory function, used for dispatch specialization +template +Operator* CreateActivationOp(ActivationOpType type); + +#if DMLC_USE_CXX11 +class ActivationProp : public OperatorProperty { + public: + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "type")) { + if (!strcmp(val, "relu")) { + type_ = kReLU; + } + } + // TODO(bing): check optype valid + } + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + using namespace mshadow; + CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; + const TShape &dshape = in_shape->at(0); + out_shape->clear(); + out_shape->push_back(dshape); + return true; + } + + virtual OperatorProperty* Copy() const { + return new ActivationProp(); + } + + virtual std::string TypeString() const { + switch (type_) { + case kReLU: return "Activation : ReLU"; + default: return "Invalid Activation"; + } + } + + // decalre dependency and inplace optimization options + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + return {out_grad[kOut], in_data[kData]}; + } + + virtual std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const { + return {}; + } + + Operator* CreateOperator(Context ctx) const; + + private: + ActivationOpType type_; +}; +#endif // DMLC_USE_CXX11 + +namespace act { +/*! \brief Rectified Linear Operation */ +struct relu { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a > 0.0f ? a : 0.0f; + } +}; +struct relu_grad { + MSHADOW_XINLINE static real_t Map(real_t a) { + return a > 0.0f ? 1.0f : 0.0f; + } +}; + +} // namespace act +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_ACTIVATION_INL_H_ + diff --git a/src/operator/activation.cc b/src/operator/activation.cc new file mode 100644 index 000000000000..b26c1e24dc53 --- /dev/null +++ b/src/operator/activation.cc @@ -0,0 +1,29 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file activation.cc + * \brief + * \author Bing Xu +*/ + +#include +#include "./activation-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateActivationOp(ActivationOpType type) { + switch (type) { + case kReLU: return new ActivationOp(); + default: return NULL; + } +} + +// DO_BIND_DISPATCH comes from operator_common.h +Operator *ActivationProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateActivationOp, type_); +} + +REGISTER_OP_PROPERTY(Activation, ActivationProp); +} // namespace op +} // namespace mxnet + diff --git a/src/operator/activation.cu b/src/operator/activation.cu new file mode 100644 index 000000000000..b6a523c003ec --- /dev/null +++ b/src/operator/activation.cu @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file activation.cu + * \brief + * \author Bing Xu +*/ + +#include "./activation-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateActivationOp(ActivationOpType type) { + switch(type) { + case kReLU: return new ActivationOp(); + default: return NULL; + } +} +} // op +} // namespace mxnet + From ee2d7f1951c82804120984bf3df2a4dfd4eb5eb5 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 15 Aug 2015 19:44:52 -0600 Subject: [PATCH 02/11] infer shape --- include/mxnet/c_api.h | 58 ++++---- include/mxnet/context.h | 2 + include/mxnet/operator.h | 30 +++-- include/mxnet/symbolic.h | 68 ++++++++-- python/mxnet/narray.py | 2 +- python/mxnet/symbol.py | 75 ++++++++++- python/mxnet/symbol_creator.py | 2 +- python/test_infer_shape.py | 19 +++ src/c_api.cc | 209 +++++++++++++++++++---------- src/operator/activation-inl.h | 4 +- src/operator/fully_connected-inl.h | 27 ++-- src/operator/operator_common.h | 39 ++++-- src/operator/param.h | 12 +- src/symbol/static_graph.cc | 92 +++++++++++-- src/symbol/symbol.cc | 96 ++++++++----- 15 files changed, 541 insertions(+), 194 deletions(-) create mode 100644 python/test_infer_shape.py diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index a9a15c4a8007..fe035b21bc7f 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -49,10 +49,9 @@ typedef void *DataIterHandle; * \return error info */ MXNET_DLL const char *MXGetLastError(); - -//-------------------------------- +//------------------------------------- // Part 1: NArray creation and deletion -//-------------------------------- +//------------------------------------- /*! * \brief create a NArray handle that is not initialized * can be used to pass in as mutate variables @@ -189,7 +188,6 @@ MXNET_DLL int MXFuncDescribe(FunctionHandle fun, mx_uint *num_scalars, mx_uint *num_mutate_vars, int *type_mask); - /*! * \brief invoke a function, the array size of passed in arguments * must match the values in the @@ -301,8 +299,8 @@ MXNET_DLL int MXSymbolListArguments(SymbolHandle symbol, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXSymbolListReturns(SymbolHandle symbol, - mx_uint *out_size, - const char ***out_str_array); + mx_uint *out_size, + const char ***out_str_array); /*! * \brief Compose the symbol on other symbols. * @@ -322,6 +320,36 @@ MXNET_DLL int MXSymbolCompose(SymbolHandle sym, mx_uint num_args, const char** keys, SymbolHandle* args); +/*! + * \brief infer shape of unknown input shapes given the known one. + * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data + * The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional. + * + * \param num_args numbe of input arguments. + * \param keys the key of keyword args (optional) + * \param arg_ind_ptr the head pointer of the rows in CSR + * \param arg_shape_data the content of the CSR + * \param in_shape_size sizeof the returning array of in_shapes + * \param in_shape_ndim returning array of shape dimensions of eachs input shape. + * \param in_shape_data returning array of pointers to head of the input shape. + * \param out_shape_size sizeof the returning array of out_shapes + * \param out_shape_ndim returning array of shape dimensions of eachs input shape. + * \param out_shape_data returning array of pointers to head of the input shape. + * \param complete whether infer shape completes or more information is needed. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const mx_uint *arg_shape_data, + mx_uint *in_shape_size, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, + mx_uint *out_shape_size, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, + int *complete); //-------------------------------------------- // Part 4: operator interface on NArray //-------------------------------------------- @@ -352,24 +380,6 @@ MXNET_DLL int MXOpFree(OperatorHandle op); */ MXNET_DLL int MXOpDescribeArgs(mx_uint *out_size, int **out_array); -/*! - * \brief infer shape of unknown input shapes given the known one - * this function do not return the shape of output - * the shapes are packed into a CSR matrix represened by ind_ptr and shape_array - * - * When the function returns, it return a new CSR matrix by updating ind_ptr, - * and return the content in the return value - * - * \param ind_ptr the head pointer of the rows in CSR - * \param shape_array the content of the CSR - * \param out_nout number of output arguments of this operation - * \param out_array another content of CSR with infered shape - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXOpInferShape(mx_uint *ind_ptr, - mx_uint *shape_array, - mx_uint *out_nout, - mx_uint *out_array); /*! * \brief call forward on the operator * \param op the operator handle diff --git a/include/mxnet/context.h b/include/mxnet/context.h index 262ba2e787d4..8dfa618ca180 100644 --- a/include/mxnet/context.h +++ b/include/mxnet/context.h @@ -6,6 +6,8 @@ #ifndef MXNET_CONTEXT_H_ #define MXNET_CONTEXT_H_ +#include "./base.h" + namespace mxnet { /*! \brief Context information about the execution enviroment */ diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 0299ef2bf167..65d6e3e92637 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -40,16 +40,18 @@ enum OpReqType { struct OpContext { /*! \brief whether it is training phase */ int is_train; - /*! \brief Stream we are running on */ - void *stream; + /*! \brief RunContext related resources */ + RunContext run_ctx; /*! \brief Resources requested by the operator */ std::vector requested; /*! - * \brief set the RunContext related parts - * \param ctx the context + * \brief get mshadow stream from Context + * \return the mshadow stream + * \tparam xpu the device type of the stream */ - inline void SetRunContext(const RunContext &ctx) { - stream = ctx.stream; + template + inline mshadow::Stream* get_stream() const { + return static_cast*>(run_ctx.stream); } }; @@ -84,13 +86,22 @@ class Operator { const std::vector &out_data) = 0; /*! * \brief Perform a Backward Operation, write gradient to the in_grad. + * + * Convention: + * out_grad.size() == OperatorProperty.NumVisibleReturns() + * out_data.size() == OperatorProperty.NumReturns() + * out_data can contain additional invisible returns that remembers the + * state carried from the Forward pass. For example mask in the dropout. + * + * The gradients are passed from visible returns in this function. + * * \param ctx runtime context available to this call - * \param out_grad the gradient value we get from output of the Operator + * \param out_grad the gradient value we get from of the Operator. * \param in_data the array of input data. * \param out_data the array of output data. * \param req request types of the saving operation, can be all types. * \param in_grad the array of gradient we need to write to. - * \sa OpReqType, OpContext + * \sa OpReqType, OpContext, OperatorProperty */ virtual void Backward(const OpContext &ctx, const std::vector &out_grad, @@ -166,7 +177,8 @@ class OperatorProperty { * * \param out_shape the shape of outputs of the operator * InferShape will modify the vector to fill output TShape - * \return if the shape inference is successful, return true, else return false. + * \return true if the shape inference is successful, false if there is not enough information. + * \throws dmlc::Error if the known arg_shapes are inconsistent. */ virtual bool InferShape(std::vector *in_shape, std::vector *out_shape) const = 0; diff --git a/include/mxnet/symbolic.h b/include/mxnet/symbolic.h index dc00f5a33fb6..e04f82b4f30f 100644 --- a/include/mxnet/symbolic.h +++ b/include/mxnet/symbolic.h @@ -38,7 +38,18 @@ class StaticGraph { /*! \brief index of output from the source. */ uint32_t index; }; - /*! \brief Operation Node in static graph */ + /*! + * \brief Operation Node in static graphs. + * There are two types of node, Forward and Backward Node. + * + * - Forward node corresponds to the op.Forward + * - Backward node corresponds to the Backward pass, + * where the corresponding forward node is indicated by backward_source_id. + * The op field in Backward node is nullptr + * + * The reason we explicit support Backward node is to allow special treatment + * such as shape inference and state sharing with Forward pass. + */ struct Node { /*! \brief wrapped operator property */ std::unique_ptr op; @@ -46,13 +57,36 @@ class StaticGraph { std::string name; /*! \brief inputs (node_id, index) for of the nodes*/ std::vector inputs; + /*! + * \brief If this field is nonnegative, this indicates this + * Node is corresponds to a Backward Operation of Operator. + * backward_source_id will points to the corresponding Forward Node. + * + * For normal node, this field is -1. + * When the node is a Backward node, the op field will be nullptr + */ + int32_t backward_source_id; + /*! \brief default constructor */ + Node() : backward_source_id(-1) {} + /*! \return whether the node is forward op node */ + inline bool is_forward() const { + return op != nullptr; + } + /*! \return whether the node is backward op node */ + inline bool is_backward() const { + return backward_source_id != -1; + } + /*! \return whether the node is variable node */ + inline bool is_variable() const { + return op == nullptr && !is_backward(); + } }; /*! \brief all nodes in the graph */ std::vector nodes; - /*! \brief index is nodes that correspods to arguments */ + /*! \brief index of nodes that correspods to arguments */ std::vector arg_nodes; - /*! \brief outputs(heads) of the graph */ - std::vector outputs; + /*! \brief heads outputs of the graph */ + std::vector heads; // funtions to help inference in static graph /*! * \brief Perform a topological sort on the graph @@ -85,8 +119,8 @@ class StaticGraph { * InferShape will modify the vector to fill output TShape * \return if the shape inference is successful, return true, else return false. */ - bool InferShape(std::vector *in_shape, - std::vector *out_shape) const; + bool InferShape(std::vector* in_shape, + std::vector* out_shape) const; }; /*! @@ -174,7 +208,7 @@ class Symbol { const std::string& name) const; /*! * \brief infer the shapes of outputs and unknown input arguments - * \param in_shape the shape of input arguments of the operator + * \param arg_shapes the shape of input arguments of the operator * this should be of same length as the vector returned by ListArguments * in_shape allows unknown elements, which are checked by shape.ndim() == 0. * For unknown shapes, InferShape will try to fill in the correct Shape in in_shape @@ -182,11 +216,23 @@ class Symbol { * * common practice: set the shape of data input, and usually weight's shape can be infered * - * \param out_shape the shape of outputs of the operator - * InferShape will modify the vector to fill output TShape - * \return if the shape inference is successful, return true, else return false. + * \param out_shapes Use to store the infered shapes of outputs. + * \return true if the shape inference is successful, false if there is not enough information. + * \throws dmlc::Error if the known arg_shapes are inconsistent. + */ + bool InferShape(std::vector *arg_shapes, + std::vector *out_shapes) const; + /*! + * \brief infer the shapes by providing shapes of known arguments. + * \param known_arg_shapes map of argument name to shape of arguments with known shapes. + * \param arg_shapes used to store infered shapes of arguments. + * \param out_shapes used to store infered shapes of outputs. + * \return true if the shape inference is successful, false if there is not enough information. + * \throws dmlc::Error if the known arg_shapes are inconsistent. */ - bool InferShape(std::vector *in_shape, std::vector *out_shape) const; + bool InferShape(const std::unordered_map &known_arg_shapes, + std::vector *arg_shapes, + std::vector *out_shapes) const; /*! * \brief get number of outputs of this symbol * \return number of outputs diff --git a/python/mxnet/narray.py b/python/mxnet/narray.py index 26a2198bd765..61839ecc0a60 100644 --- a/python/mxnet/narray.py +++ b/python/mxnet/narray.py @@ -134,7 +134,7 @@ def shape(self): pdata = ctypes.POINTER(mx_uint)() check_call(_LIB.MXNArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) - return tuple(pdata[i] for i in range(ndim.value)) + return tuple(pdata[:ndim.value]) @property def context(self): diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index 031b18ab862f..0caa4b6a0a90 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -5,7 +5,7 @@ import ctypes from .base import _LIB -from .base import c_array, c_str +from .base import c_array, c_str, mx_uint from .base import SymbolHandle from .base import check_call @@ -136,6 +136,77 @@ def list_returns(self): self.handle, ctypes.byref(size), ctypes.byref(sarr))) return [sarr[i] for i in range(size.value)] + def infer_shape(self, *args, **kwargs): + """Infer the shape of outputs and arguments of given known shapes of arguments. + + User can either pass in the known shapes in positional way or keyword argument way. + Pair of Nones is returned if there is not enough information passed in. + An error will be raised if there is inconsistency found in the known shapes passed in. + + Parameters + ---------- + *args : + Provide shape of arguments in a positional way. + Unknown shape can be marked as None + + **kwargs : + Provide keyword arguments of known shapes. + + Returns + ------- + arg_shapes : list of tuple or None + List of shapes of arguments. + The order is in the same order as list_arguments() + out_shapes : list of tuple or None + List of shapes of outputs. + The order is in the same order as list_returns() + """ + if len(args) != 0 and len(kwargs) != 0: + raise ValueError('Can only specify known argument shapes either by positional or kwargs way.') + sdata = [] + indptr = [0] + if len(args) != 0: + keys = None + for s in args: + if s is not None: + if not isinstance(s, tuple): + raise TypeError('Argument need to be shapes(tuple)') + sdata.extend(s) + indptr.append(len(sdata)) + else: + keys = [] + for k, v in kwargs.items(): + keys.append(c_str(k)) + if not isinstance(v, tuple): + raise TypeError('Argument need to be shapes(tuple)') + sdata.extend(v) + indptr.append(len(sdata)) + arg_shape_size = mx_uint() + arg_shape_ndim = ctypes.POINTER(mx_uint)() + arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + out_shape_size = mx_uint() + out_shape_ndim = ctypes.POINTER(mx_uint)() + out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + complete = ctypes.c_int() + check_call(_LIB.MXSymbolInferShape( + self.handle, len(indptr) - 1, + c_array(ctypes.c_char_p, keys), + c_array(mx_uint, indptr), + c_array(mx_uint, sdata), + ctypes.byref(arg_shape_size), + ctypes.byref(arg_shape_ndim), + ctypes.byref(arg_shape_data), + ctypes.byref(out_shape_size), + ctypes.byref(out_shape_ndim), + ctypes.byref(out_shape_data), + ctypes.byref(complete))) + if complete.value != 0: + arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]]) for i in range(arg_shape_size.value)] + out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]]) for i in range(out_shape_size.value)] + return (arg_shapes, out_shapes) + else: + return (None, None) + def debug_str(self): """Get a debug string. @@ -145,6 +216,6 @@ def debug_str(self): Debug string of the symbol. """ debug_str = ctypes.c_char_p() - check_call(_LIB.MXSymbolPrint( \ + check_call(_LIB.MXSymbolPrint( self.handle, ctypes.byref(debug_str))) return debug_str.value diff --git a/python/mxnet/symbol_creator.py b/python/mxnet/symbol_creator.py index c81deebaef11..d507a9c2871a 100644 --- a/python/mxnet/symbol_creator.py +++ b/python/mxnet/symbol_creator.py @@ -54,7 +54,7 @@ def __call__(self, *args, **kwargs): if isinstance(v, Symbol): symbol_kwargs[k] = v else: - param_keys.append(k) + param_keys.append(c_str(k)) param_vals.append(c_str(str(v))) # create atomic symbol diff --git a/python/test_infer_shape.py b/python/test_infer_shape.py new file mode 100644 index 000000000000..b94388e5546d --- /dev/null +++ b/python/test_infer_shape.py @@ -0,0 +1,19 @@ +# pylint: skip-file +import mxnet as mx + +data = mx.sym.Variable('data') + +fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=1000) +fc2 = mx.sym.FullyConnected(data=fc1, name='fc2', num_hidden=10) +fc3 = mx.sym.FullyConnected( name='fc2', num_hidden=10) + +print fc2.list_arguments() + +data_shape = (100, 100) +arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +print dict(zip(fc2.list_arguments(), arg_shapes)) +print dict(zip(fc2.list_returns(), out_shapes)) + +weight_shape= (1, 100) +data_shape = (100, 100) +arg_shapes, out_shapes = fc2.infer_shape(data=data_shape, fc1_weight=weight_shape) diff --git a/src/c_api.cc b/src/c_api.cc index d5a1a67d70c6..896e0b5e5532 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -27,61 +28,76 @@ #message("Warning: Threadlocal is not enabled"); #endif -/*! \brief symbol wrapper to easily hold returning information */ -struct MXAPISymbolWrapper { - /*! \brief the actual symbol */ - mxnet::Symbol sym; +using namespace mxnet; + +/*! \brief entry to to easily hold returning information */ +struct MXAPIThreadLocalEntry { + /*! \brief holds last error message */ + std::string last_error; /*! \brief result holder for returning string */ std::string ret_str; /*! \brief result holder for returning strings */ std::vector ret_vec_str; /*! \brief result holder for returning string pointers */ std::vector ret_vec_charp; + /*! \brief result holder for returning shapes */ + std::vector arg_shapes, out_shapes; + /*! \brief result holder for returning shape dimensions */ + std::vector arg_shape_ndim, out_shape_ndim; + /*! \brief result holder for returning shape pointer */ + std::vector arg_shape_data, out_shape_data; + // helper function to setup return value of shape array + inline static void SetupShapeArrayReturn( + const std::vector &shapes, + std::vector *ndim, + std::vector *data) { + ndim->resize(shapes.size()); + data->resize(shapes.size()); + for (size_t i = 0; i < shapes.size(); ++i) { + ndim->at(i) = shapes[i].ndim(); + data->at(i) = shapes[i].data(); + } + } }; /*! - * \brief helper to store error message in threadlocal storage + * \brief A threadlocal store to store threadlocal variables. + * Will return a thread local singleton of type T + * \tparam T the type we like to store */ -class MXAPIErrorMessageHelper { +class MXAPIThreadLocalStore { public: - /*! \brief get a single instance out from */ - static MXAPIErrorMessageHelper *Get() { - static MXAPIErrorMessageHelper inst; - return &inst; - } - /*! - * \brief a helper function for error handling - * will set the last error to be str_set when it is not NULL - * \param str_set the error to set - * \return a pointer message to last error - */ - static const char *SetGetLastError(const char *str_set) { - // use last_error to record last error - static MX_TREAD_LOCAL std::string *last_error = NULL; - if (last_error == NULL) { - last_error = new std::string(); - Get()->RegisterDelete(last_error); + /*! \brief store return entry */ + typedef MXAPIThreadLocalEntry T; + /*! \return get a thread local singleton */ + static T* Get() { + static MX_TREAD_LOCAL T* ptr = nullptr; + if (ptr == nullptr) { + ptr = new T(); + Singleton()->RegisterDelete(ptr); } - if (str_set != NULL) { - *last_error = str_set; - } - return last_error->c_str(); + return ptr; } private: /*! \brief constructor */ - MXAPIErrorMessageHelper() {} + MXAPIThreadLocalStore() {} /*! \brief destructor */ - ~MXAPIErrorMessageHelper() { + ~MXAPIThreadLocalStore() { for (size_t i = 0; i < data_.size(); ++i) { delete data_[i]; } } + /*! \return singleton of the store */ + static MXAPIThreadLocalStore *Singleton() { + static MXAPIThreadLocalStore inst; + return &inst; + } /*! * \brief register str for internal deletion * \param str the string pointer */ - void RegisterDelete(std::string *str) { + void RegisterDelete(T *str) { std::unique_lock lock(mutex_); data_.push_back(str); lock.unlock(); @@ -89,13 +105,12 @@ class MXAPIErrorMessageHelper { /*! \brief internal mutex */ std::mutex mutex_; /*!\brief internal data */ - std::vector data_; + std::vector data_; }; // NOTE: all functions return 0 upon success // consider add try/catch block for user error // handling in the future -using namespace mxnet; /*! \brief macro to guard beginning and end section of all functions */ #define API_BEGIN() try { @@ -111,7 +126,7 @@ using namespace mxnet; /*! \brief return str message of the last error */ const char *MXGetLastError() { - return MXAPIErrorMessageHelper::SetGetLastError(NULL); + return MXAPIThreadLocalStore::Get()->last_error.c_str(); } /*! @@ -120,7 +135,7 @@ const char *MXGetLastError() { * \return the return value of API after exception is handled */ int MXHandleException(const dmlc::Error &e) { - MXAPIErrorMessageHelper::SetGetLastError(e.what()); + MXAPIThreadLocalStore::Get()->last_error = e.what(); return -1; } @@ -295,7 +310,7 @@ int MXSymbolCreateAtomicSymbol(AtomicSymbolCreator creator, const char **keys, const char **vals, SymbolHandle *out) { - MXAPISymbolWrapper *s = new MXAPISymbolWrapper(); + Symbol *s = new Symbol(); OperatorProperty *op = nullptr; API_BEGIN(); @@ -304,15 +319,15 @@ int MXSymbolCreateAtomicSymbol(AtomicSymbolCreator creator, for (int i = 0; i < num_param; ++i) { op->SetParam(keys[i], vals[i]); } - s->sym = Symbol::Create(op); + *s = Symbol::Create(op); *out = s; API_END_HANDLE_ERROR(delete s; delete op); } int MXSymbolCreateVariable(const char *name, SymbolHandle *out) { - MXAPISymbolWrapper *s = new MXAPISymbolWrapper(); + Symbol *s = new Symbol(); API_BEGIN(); - s->sym = Symbol::CreateVariable(name); + *s = Symbol::CreateVariable(name); *out = s; API_END_HANDLE_ERROR(delete s); } @@ -320,71 +335,72 @@ int MXSymbolCreateVariable(const char *name, SymbolHandle *out) { int MXSymbolCreateGroup(mx_uint num_symbols, SymbolHandle *symbols, SymbolHandle *out) { - MXAPISymbolWrapper *s = new MXAPISymbolWrapper(); - MXAPISymbolWrapper **sym_arr = (MXAPISymbolWrapper**)symbols; // NOLINT(*) + Symbol *s = new Symbol(); + Symbol **sym_arr = (Symbol**)symbols; // NOLINT(*) API_BEGIN(); std::vector syms; for (mx_uint i = 0; i < num_symbols; ++i) { - syms.push_back(sym_arr[i]->sym); + syms.push_back(*sym_arr[i]); } - s->sym = Symbol::CreateGroup(syms); + *s = Symbol::CreateGroup(syms); *out = s; API_END_HANDLE_ERROR(delete s); } int MXSymbolFree(SymbolHandle symbol) { API_BEGIN(); - delete static_cast(symbol); + delete static_cast(symbol); API_END(); } int MXSymbolCopy(SymbolHandle symbol, SymbolHandle *out) { - MXAPISymbolWrapper *s = new MXAPISymbolWrapper(); - + Symbol *s = new Symbol(); API_BEGIN(); - s->sym = (static_cast(symbol)->sym).Copy(); + *s = static_cast(symbol)->Copy(); *out = s; API_END_HANDLE_ERROR(delete s); } int MXSymbolPrint(SymbolHandle symbol, const char **out_str) { - MXAPISymbolWrapper *s = static_cast(symbol); - + Symbol *s = static_cast(symbol); + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); std::ostringstream os; - (s->sym).Print(os); - s->ret_str = os.str(); - *out_str = (s->ret_str).c_str(); + s->Print(os); + ret->ret_str = os.str(); + *out_str = (ret->ret_str).c_str(); API_END(); } int MXSymbolListArguments(SymbolHandle symbol, mx_uint *out_size, const char ***out_str_array) { - MXAPISymbolWrapper *s = static_cast(symbol); + Symbol *s = static_cast(symbol); + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); - s->ret_vec_str = std::move((s->sym).ListArguments()); - s->ret_vec_charp.clear(); - for (size_t i = 0; i < s->ret_vec_str.size(); ++i) { - s->ret_vec_charp.push_back(s->ret_vec_str[i].c_str()); + ret->ret_vec_str = std::move(s->ListArguments()); + ret->ret_vec_charp.clear(); + for (size_t i = 0; i < ret->ret_vec_str.size(); ++i) { + ret->ret_vec_charp.push_back(ret->ret_vec_str[i].c_str()); } - *out_size = static_cast(s->ret_vec_charp.size()); - *out_str_array = dmlc::BeginPtr(s->ret_vec_charp); + *out_size = static_cast(ret->ret_vec_charp.size()); + *out_str_array = dmlc::BeginPtr(ret->ret_vec_charp); API_END(); } int MXSymbolListReturns(SymbolHandle symbol, - mx_uint *out_size, - const char ***out_str_array) { - MXAPISymbolWrapper *s = static_cast(symbol); + mx_uint *out_size, + const char ***out_str_array) { + Symbol *s = static_cast(symbol); + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); - s->ret_vec_str = std::move((s->sym).ListReturns()); - s->ret_vec_charp.clear(); - for (size_t i = 0; i < s->ret_vec_str.size(); ++i) { - s->ret_vec_charp.push_back(s->ret_vec_str[i].c_str()); + ret->ret_vec_str = std::move(s->ListReturns()); + ret->ret_vec_charp.clear(); + for (size_t i = 0; i < ret->ret_vec_str.size(); ++i) { + ret->ret_vec_charp.push_back(ret->ret_vec_str[i].c_str()); } - *out_size = static_cast(s->ret_vec_charp.size()); - *out_str_array = dmlc::BeginPtr(s->ret_vec_charp); + *out_size = static_cast(ret->ret_vec_charp.size()); + *out_str_array = dmlc::BeginPtr(ret->ret_vec_charp); API_END(); } @@ -397,19 +413,68 @@ int MXSymbolCompose(SymbolHandle sym, std::string s_name; if (name != nullptr) s_name = name; - MXAPISymbolWrapper* s = static_cast(sym); + Symbol* s = static_cast(sym); if (keys == nullptr && num_args != 0) { std::vector pos_args; for (mx_uint i = 0; i < num_args; ++i) { - pos_args.push_back(((MXAPISymbolWrapper*)(args[i]))->sym); // NOLINT(*) + pos_args.push_back(*((Symbol*)args[i])); // NOLINT(*) } - (s->sym).Compose(pos_args, s_name); + s->Compose(pos_args, s_name); } else { std::unordered_map kwargs; for (mx_uint i = 0; i < num_args; ++i) { - kwargs[keys[i]] = ((MXAPISymbolWrapper*)(args[i]))->sym; // NOLINT(*) + kwargs[keys[i]] = *((Symbol*)args[i]); // NOLINT(*) + } + s->Compose(kwargs, s_name); + } + API_END(); +} + +int MXSymbolInferShape(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const mx_uint *arg_shape_data, + mx_uint *in_shape_size, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, + mx_uint *out_shape_size, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, + int *complete) { + Symbol *s = static_cast(sym); + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); + bool succ; + API_BEGIN(); + if (keys == nullptr && num_args != 0) { + ret->arg_shapes.clear(); + for (mx_uint i = 0; i < num_args; ++i) { + ret->arg_shapes.push_back(TShape(arg_shape_data + arg_ind_ptr[i], + arg_shape_data + arg_ind_ptr[i+1])); } - (s->sym).Compose(kwargs, s_name); + succ = s->InferShape(&(ret->arg_shapes), &(ret->out_shapes)); + } else { + std::unordered_map kwargs; + for (mx_uint i = 0; i < num_args; ++i) { + kwargs[keys[i]] = TShape(arg_shape_data + arg_ind_ptr[i], + arg_shape_data + arg_ind_ptr[i+1]); + } + succ = s->InferShape(kwargs, &(ret->arg_shapes), &(ret->out_shapes)); + } + if (succ) { + MXAPIThreadLocalEntry::SetupShapeArrayReturn( + ret->arg_shapes, &(ret->arg_shape_ndim), &(ret->arg_shape_data)); + MXAPIThreadLocalEntry::SetupShapeArrayReturn( + ret->out_shapes, &(ret->out_shape_ndim), &(ret->out_shape_data)); + *in_shape_size = static_cast(ret->arg_shapes.size()); + *in_shape_ndim = dmlc::BeginPtr(ret->arg_shape_ndim); + *in_shape_data = dmlc::BeginPtr(ret->arg_shape_data); + *out_shape_size = static_cast(ret->out_shapes.size()); + *out_shape_ndim = dmlc::BeginPtr(ret->out_shape_ndim); + *out_shape_data = dmlc::BeginPtr(ret->out_shape_data); + *complete = 1; + } else { + *complete = 0; } API_END(); } diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 221e69ce948f..564f81d47833 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -37,7 +37,7 @@ class ActivationOp : public Operator { CHECK_EQ(req[kOut], kWriteTo); CHECK_EQ(in_data.size(), 1); CHECK_EQ(out_data.size(), 1); - Stream *s = static_cast *>(ctx.stream); + Stream *s = ctx.get_stream(); Tensor data = in_data[kData].FlatTo2D(s); Tensor out = out_data[kOut].FlatTo2D(s); out = F(data); @@ -54,7 +54,7 @@ class ActivationOp : public Operator { CHECK_EQ(out_grad.size(), 1); CHECK(in_data.size() == 1 && in_grad.size() == 1); CHECK_EQ(req.size(), 1); - Stream *s = static_cast *>(ctx.stream); + Stream *s = ctx.get_stream(); Tensor out_gradient = out_grad[kData].FlatTo2D(s); Tensor data = in_data[kData].FlatTo2D(s); Tensor grad = out_grad[kOut].FlatTo2D(s); diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 5c54d37220ee..e2913e65aba8 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -45,7 +45,7 @@ class FullyConnectedOp : public Operator { CHECK_EQ(out_data.size(), 1); // TODO(bing): check the BLAS Handle, be careful // maybe need blas handle from context - Stream *s = static_cast *>(ctx.stream); + Stream *s = ctx.get_stream(); Tensor data = in_data[kData].FlatTo2D(s); Tensor wmat = in_data[kWeight].get(s); Tensor out = out_data[kOut].FlatTo2D(s); @@ -70,7 +70,7 @@ class FullyConnectedOp : public Operator { CHECK_EQ(req.size(), expected); // TODO(bing): check the BLAS Handle, be careful // maybe need blas handle from context - Stream *s = static_cast *>(ctx.stream); + Stream *s = ctx.get_stream(); Tensor data = in_data[kData].FlatTo2D(s); Tensor wmat = in_data[kWeight].get(s); Tensor grad = out_grad[kOut].FlatTo2D(s); @@ -123,16 +123,25 @@ class FullyConnectedProp : public OperatorProperty { } CHECK_GT(param_.num_hidden, 0); const TShape &dshape = (*in_shape)[0]; - CHECK_EQ(dshape.ndim(), 4) << \ - "Input data should be 4D in batch-1-1-hidden"; - CHECK_NE(dshape.ndim(), 0) << "Require data shape to be known"; - ShapeAssignCheck((*in_shape)[kWeight], Shape2(param_.num_hidden, dshape[3])); + // require data to be known + if (dshape.ndim() == 0) return false; + + index_t num_input; + if (dshape.ndim() == 4) { + // TODO(bing) consider deprecate 4D input + CHECK(dshape[1] == 1 && dshape[2] == 1); + num_input = dshape[3]; + } else { + CHECK_EQ(dshape.ndim(), 2) + << "FullyConnecteded: Input data should be 2D in (batch, num_hidden)"; + num_input = dshape[1]; + } + SHAPE_ASSIGN_CHECK(*in_shape, kWeight, Shape2(param_.num_hidden, num_input)); if (param_.no_bias == 0) { - ShapeAssignCheck((*in_shape)[kBias], Shape1(param_.num_hidden)); + SHAPE_ASSIGN_CHECK(*in_shape, kBias, Shape1(param_.num_hidden)); } out_shape->clear(); - out_shape->push_back(dshape); - (*out_shape)[0][3] = param_.num_hidden; + out_shape->push_back(Shape2(dshape[0], param_.num_hidden)); return true; } diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index 87b581f28278..7ffa3d1456d2 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -34,20 +34,39 @@ inline void Assign(OType &out, // NOLINT(*) default: LOG(FATAL) << "not reached"; } } + +/*! \brief exception throwed by InferShape error */ +struct InferShapeError { + /*! \brief analyze message */ + std::string msg; + /*! \brief corresponding input index */ + int index; + // constructor + InferShapeError(std::string msg, int index) + : msg(msg), index(index) {} +}; + /*! - * \brief assign shape to out if out is unknown - * otherwise check consistency - * \param out the output shape to be stored + * \brief macro assign shape to out if out is unknown otherwise check consistency + * Use macro so we can see the error file more clearly + * \param shape_array the shape array to store the result + * \param index the index of in the array * \param shape the infered shape */ -template -inline void ShapeAssignCheck(TShape &out, const TS &shape) { // NOLINT(*) - if (out.ndim() == 0) { - out = shape; - } else { - CHECK(out == shape) << "InferShape:: shape inconsistent"; +#define SHAPE_ASSIGN_CHECK(shape_array, index, shape) \ + { \ + auto &out = (shape_array)[index]; \ + if (out.ndim() == 0) { \ + out = shape; \ + } else { \ + if (out != shape) { \ + std::ostringstream os; \ + os << "Shape inconsistent, Provided " << '='<< out << ',' \ + << " inferred shape=" << shape; \ + throw ::mxnet::op::InferShapeError(os.str(), index); \ + } \ + } \ } -} // helper macro to implement bind dispatch #if MXNET_USE_CUDA diff --git a/src/operator/param.h b/src/operator/param.h index e1f6b4ee58d8..f0ce5886e2fb 100644 --- a/src/operator/param.h +++ b/src/operator/param.h @@ -35,10 +35,6 @@ struct Param { int no_bias; /*! \brief maximum temp_col_size allowed in each layer */ int temp_col_max; - /*! \brief number of input channels */ - int num_input_channel; - /*! \brief number of input hidden nodes, used by fullc */ - int num_input_node; /*! \brief reserved fields, for future compatibility */ int reserved[64]; @@ -48,11 +44,9 @@ struct Param { } inline void SetParam(const char *name, const char* val) { - if (!strcmp(name, "nhidden")) num_hidden = atoi(val); - if (!strcmp(name, "num_input_node")) num_input_node = atoi(val); - if (!strcmp(name, "num_input_channel")) num_input_channel = atoi(val); - if (!strcmp(name, "nchannel")) num_channel = atoi(val); - if (!strcmp(name, "ngroup")) num_group = atoi(val); + if (!strcmp(name, "num_hidden")) num_hidden = atoi(val); + if (!strcmp(name, "num_channel")) num_channel = atoi(val); + if (!strcmp(name, "num_group")) num_group = atoi(val); if (!strcmp(name, "kernel_size")) { kernel_y = kernel_x = atoi(val); } diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index 5419e26afe86..62de7963638a 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -7,14 +7,18 @@ #include #include #include +#include "../operator/operator_common.h" namespace mxnet { std::vector StaticGraph::TopoSort() const { std::vector out_degree(nodes.size(), 0); - for (const Node &n : nodes) { - for (const DataEntry &e : n.inputs) { + for (const Node& n : nodes) { + for (const DataEntry& e : n.inputs) { ++out_degree[e.source_id]; } + if (n.is_backward()) { + ++out_degree[n.backward_source_id]; + } } std::vector ret(nodes.size()); auto result = ret.rbegin(); @@ -29,12 +33,17 @@ std::vector StaticGraph::TopoSort() const { queue.pop(); *result = node_id; ++result; - for (const DataEntry &e : nodes[node_id].inputs) { - out_degree[e.source_id] -= 1; - if (out_degree[e.source_id] == 0) { + const Node& n = nodes[node_id]; + for (const DataEntry& e : n.inputs) { + if (--out_degree[e.source_id] == 0) { queue.push(e.source_id); } } + if (n.is_backward()) { + if (--out_degree[n.backward_source_id] == 0) { + queue.push(n.backward_source_id); + } + } } return std::move(ret); } @@ -42,19 +51,73 @@ std::vector StaticGraph::TopoSort() const { bool StaticGraph::InferNodeShapes(const std::vector &topo_order, std::vector > *node_out_shapes) const { for (uint32_t nid : topo_order) { - const Node &node = nodes[nid]; - if (node.op != nullptr) { + const Node& node = nodes[nid]; + if (node.is_forward()) { std::vector in_shape; - for (const DataEntry &e : node.inputs) { + for (const DataEntry& e : node.inputs) { in_shape.push_back((*node_out_shapes)[e.source_id][e.index]); } - if (!node.op->InferShape(&in_shape, &(*node_out_shapes)[nid])) return false; + try { + if (!node.op->InferShape(&in_shape, &(*node_out_shapes)[nid])) return false; + } catch (const op::InferShapeError &err) { + // error handling + const std::string &op_name = node.name; + std::string arg_name = node.op->ListArguments()[err.index]; + std::ostringstream os; + os << "InferShape Error in " + << op_name << "\'s" << ' ' << arg_name << " argument\n"; + auto &source = nodes[node.inputs[err.index].source_id]; + if (source.is_variable()) { + os << "Corresponding keyword of symbol: " << source.name << '\n' << err.msg; + } + throw dmlc::Error(os.str()); + } for (size_t i = 0; i < node.inputs.size(); ++i) { - const DataEntry &e = node.inputs[i]; + const DataEntry& e = node.inputs[i]; (*node_out_shapes)[e.source_id][e.index] = in_shape[i]; } + } else if (node.is_backward()) { + // simply use shapes from forward pass to assign backward shape + const Node& forward = nodes[node.backward_source_id]; + CHECK(forward.is_forward()); + std::vector& in_grad_shapes = (*node_out_shapes)[nid]; + CHECK(in_grad_shapes.size() == forward.inputs.size()); + // assign the input shape to output gradients + for (size_t i = 0; i < forward.inputs.size(); ++i) { + const DataEntry &e = forward.inputs[i]; + try { + SHAPE_ASSIGN_CHECK(in_grad_shapes, i, (*node_out_shapes)[e.source_id][e.index]); + } catch (const op::InferShapeError &err) { + const std::string &op_name = forward.name; + std::string arg_name = forward.op->ListArguments()[e.index]; + std::ostringstream os; + os << "InferShape Error in " + << op_name << "\'s" << ' ' << arg_name << " gradient argument\n" + << err.msg; + throw dmlc::Error(os.str()); + } + } + // consistent check for input shapes + auto& out_data_shapes = (*node_out_shapes)[node.backward_source_id]; + // use BackwardInputs to select entries corresponding to node.inputs + auto in_shape = forward.op->BackwardInputs( + out_data_shapes, in_grad_shapes, out_data_shapes); + for (size_t i = 0; i < node.inputs.size(); ++i) { + const DataEntry& e = node.inputs[i]; + try { + SHAPE_ASSIGN_CHECK((*node_out_shapes)[e.source_id], e.index, in_shape[i]); + } catch (const op::InferShapeError &err) { + const std::string &op_name = nodes[e.source_id].name; + std::ostringstream os; + os << "InferShape Error in " + << op_name << "\'s" << " gradient values\n" + << err.msg; + throw dmlc::Error(os.str()); + } + } } } + // TODO(bing) assign shape for head gradient return true; } @@ -63,8 +126,10 @@ bool StaticGraph::InferShape(std::vector *in_shape, std::vector > node_out_shapes(nodes.size()); for (size_t i = 0; i < nodes.size(); ++i) { int nout = 1; - if (nodes[i].op != nullptr) { + if (nodes[i].is_forward()) { nout = nodes[i].op->NumReturns(); + } else if (nodes[i].is_backward()) { + nout = static_cast(nodes[nodes[i].backward_source_id].inputs.size()); } node_out_shapes[i].resize(nout); } @@ -78,8 +143,9 @@ bool StaticGraph::InferShape(std::vector *in_shape, for (size_t i = 0; i < arg_nodes.size(); ++i) { (*in_shape)[i] = node_out_shapes[arg_nodes[i]][0]; } - for (size_t i = 0; i < outputs.size(); ++i) { - DataEntry e = outputs[i]; + out_shape->resize(heads.size()); + for (size_t i = 0; i < heads.size(); ++i) { + const DataEntry &e = heads[i]; (*out_shape)[i] = node_out_shapes[e.source_id][e.index]; } return true; diff --git a/src/symbol/symbol.cc b/src/symbol/symbol.cc index 86cf54feabfa..54a5fe9422b2 100644 --- a/src/symbol/symbol.cc +++ b/src/symbol/symbol.cc @@ -1,7 +1,7 @@ /*! * Copyright (c) 2015 by Contributors - * \file symbol.cc - * \brief symbol of mxnet + *\file symbol.cc + *\brief symbol of mxnet */ #include #include @@ -12,13 +12,13 @@ namespace mxnet { /*! - * \brief Node is represents node of an operator in the symbolic graph. + *\brief Node is represents node of an operator in the symbolic graph. * - * It stores connection to the inputs to function represented by OperatorProperty - * NOTE on data structure: there are three types of node: - * - Normal node: contains all the necessary elements of a graph. - * - OperatorProperty: the inputs_ is empty, represents an OperatorProperty that has not been applied. - * - Variable: the sym_ is nullptr, represents an named Variable of tensors that can be composed. + *It stores connection to the inputs to function represented by OperatorProperty + *NOTE on data structure: there are three types of node: + *- Normal node: contains all the necessary elements of a graph. + *- OperatorProperty: the inputs_ is empty, represents an OperatorProperty that has not been applied. + *- Variable: the sym_ is nullptr, represents an named Variable of tensors that can be composed. */ struct Symbol::Node { /*! \brief Operator of this node */ @@ -28,11 +28,11 @@ struct Symbol::Node { /*! \brief inputs to this node */ std::vector inputs; /*! - * \brief constructor - * \param op the OperatorProperty to construct the Node - * \param name the name of the symbol + *\brief constructor + *\param op the OperatorProperty to construct the Node + *\param name the name of the symbol */ - explicit Node(OperatorProperty* op = nullptr, const std::string& name = "") + explicit Node(OperatorProperty *op = nullptr, const std::string& name = "") : op(op), name(name) { } /*! \return Whether the symbol is atomic */ @@ -63,7 +63,7 @@ inline void Symbol::DFSVisit(FVisit fvisit) const { } } while (!stack.empty()) { - Node* back = stack.back(); + Node *back = stack.back(); stack.pop_back(); fvisit(back); for (auto it = back->inputs.rbegin(); it != back->inputs.rend(); ++it) { @@ -76,6 +76,28 @@ inline void Symbol::DFSVisit(FVisit fvisit) const { } } +// helper function to handle keyword argument mismatch +// throw approperiate messages +template +inline void KeywordArgumentMismatch(const char *source, + const TMap &kwargs, + const std::vector args) { + std::unordered_set keys(args.begin(), args.end()); + std::ostringstream head, msg; + msg << "\nCandidate arguments:\n"; + for (size_t i = 0; i < args.size(); ++i) { + msg << "\t[" << i << ']' << args[i] << '\n'; + } + + for (const auto& kv : kwargs) { + if (keys.count(kv.first) == 0) { + LOG(FATAL) << source + << "Keyword argument name " << kv.first << " not found." + << msg.str(); + } + } +} + int Symbol::FindDuplicateArgs(std::unordered_map *out) const { out->clear(); int max_dup = 1; @@ -328,19 +350,8 @@ void Symbol::Compose(const std::unordered_map& kwargs, } } if (nmatched != kwargs.size()) { - // Error message handling - std::vector req_args = this->ListArguments(); - std::unordered_set keys(req_args.begin(), req_args.end()); - std::ostringstream msg; - msg << "\nCandidate arguments:\n"; - for (size_t i = 0; i < req_args.size(); ++i) { - msg << "\t[" << i << ']' << req_args[i] << '\n'; - } - for (const auto& kv : kwargs) { - CHECK_NE(keys.count(kv.first), 0) - << "Keyword Argument " << kv.first << " not found in arguments." - << msg.str(); - } + KeywordArgumentMismatch( + "Symbol.Compose", kwargs, ListArguments()); } } @@ -358,11 +369,34 @@ Symbol Symbol::operator () (const std::unordered_map& kwarg return s; } -bool Symbol::InferShape(std::vector *in_shape, - std::vector *out_shape) const { +bool Symbol::InferShape(std::vector *arg_shapes, + std::vector *out_shapes) const { + StaticGraph g; + this->ToStaticGraph(&g); + return g.InferShape(arg_shapes, out_shapes); +} + +bool Symbol::InferShape(const std::unordered_map& known_arg_shapes, + std::vector *arg_shapes, + std::vector *out_shapes) const { StaticGraph g; this->ToStaticGraph(&g); - return g.InferShape(in_shape, out_shape); + arg_shapes->clear(); + arg_shapes->resize(g.arg_nodes.size(), TShape()); + size_t nmatched = 0; + for (size_t i = 0; i < g.arg_nodes.size(); ++i) { + const std::string& name = g.nodes[g.arg_nodes[i]].name; + auto it = known_arg_shapes.find(name); + if (it != known_arg_shapes.end()) { + arg_shapes->at(i) = it->second; + ++nmatched; + } + } + if (nmatched != known_arg_shapes.size()) { + KeywordArgumentMismatch( + "Symbol.InterShape", known_arg_shapes, ListArguments()); + } + return g.InferShape(arg_shapes, out_shapes); } Symbol Symbol::Create(OperatorProperty *op) { @@ -424,12 +458,12 @@ void Symbol::ToStaticGraph(StaticGraph *out_graph) const { } } // setup heads - out_graph->outputs.clear(); + out_graph->heads.clear(); for (auto &head : heads_) { StaticGraph::DataEntry e; e.source_id = node_index[head.source.get()]; e.index = head.index; - out_graph->outputs.push_back(e); + out_graph->heads.push_back(e); } } } // namespace mxnet From 8fe878e1746338c3c67dcdc2dfc0961b0d9ef6f2 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 15 Aug 2015 21:09:40 -0600 Subject: [PATCH 03/11] minor change in act --- src/operator/activation-inl.h | 52 +++++++--------- src/operator/activation.cc | 5 +- src/operator/activation.cu | 5 +- .../{static_operator => }/mshadow_op.h | 19 +++--- src/operator/operator_common.h | 1 + .../static_operator/activation_op-inl.h | 61 ------------------- 6 files changed, 40 insertions(+), 103 deletions(-) rename src/operator/{static_operator => }/mshadow_op.h (87%) delete mode 100644 src/operator/static_operator/activation_op-inl.h diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 564f81d47833..27bde578a3b1 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -20,7 +20,7 @@ namespace op { // // These enums are only visible within this header enum ActivationOpInputs {kData}; enum ActivationOpOutputs {kOut}; -enum ActivationOpType {kReLU}; +enum ActivationOpType {kUnknown, kReLU, kSigmoid, kTanh}; /** * \brief This is the implementation of activation operator. * \tparam xpu The device that the op will be executed on. @@ -34,7 +34,6 @@ class ActivationOp : public Operator { const std::vector &out_data) { using namespace mshadow; using namespace mshadow::expr; - CHECK_EQ(req[kOut], kWriteTo); CHECK_EQ(in_data.size(), 1); CHECK_EQ(out_data.size(), 1); Stream *s = ctx.get_stream(); @@ -56,9 +55,9 @@ class ActivationOp : public Operator { CHECK_EQ(req.size(), 1); Stream *s = ctx.get_stream(); Tensor out_gradient = out_grad[kData].FlatTo2D(s); - Tensor data = in_data[kData].FlatTo2D(s); + Tensor output = out_data[kData].FlatTo2D(s); Tensor grad = out_grad[kOut].FlatTo2D(s); - Assign(grad, req[kData], F(out_gradient * F(data))); + Assign(grad, req[kData], F(out_gradient * output)); } }; // class ActivationOp @@ -69,33 +68,35 @@ Operator* CreateActivationOp(ActivationOpType type); #if DMLC_USE_CXX11 class ActivationProp : public OperatorProperty { public: + ActivationProp() : type_(kUnknown) {} + virtual void SetParam(const char *name, const char *val) { if (!strcmp(name, "type")) { - if (!strcmp(val, "relu")) { - type_ = kReLU; - } + if (!strcmp(val, "relu")) type_ = kReLU; + if (!strcmp(val, "sigmoid")) type_ = kSigmoid; + if (!strcmp(val, "tanh")) type_ = kTanh; } - // TODO(bing): check optype valid + CHECK(type_ >= kReLU && type_ <= kTanh) << "Invalid activation type"; } virtual bool InferShape(std::vector *in_shape, std::vector *out_shape) const { using namespace mshadow; CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; const TShape &dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; out_shape->clear(); out_shape->push_back(dshape); return true; } virtual OperatorProperty* Copy() const { - return new ActivationProp(); + auto ptr = new ActivationProp(); + ptr->type_ = this->type_; + return ptr; } virtual std::string TypeString() const { - switch (type_) { - case kReLU: return "Activation : ReLU"; - default: return "Invalid Activation"; - } + return "Activation"; } // decalre dependency and inplace optimization options @@ -103,7 +104,7 @@ class ActivationProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const { - return {out_grad[kOut], in_data[kData]}; + return {out_grad[kOut], out_data[kData]}; } virtual std::vector > BackwardInplaceOption( @@ -111,7 +112,13 @@ class ActivationProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data, const std::vector &in_grad) const { - return {}; + return {{out_grad[kData], in_grad[kData]}}; + } + + virtual std::vector > ForwardInplaceOption( + const std::vector &in_data, + const std::vector &out_data) const { + return {{in_data[kData], out_data[kData]}}; } Operator* CreateOperator(Context ctx) const; @@ -120,21 +127,6 @@ class ActivationProp : public OperatorProperty { ActivationOpType type_; }; #endif // DMLC_USE_CXX11 - -namespace act { -/*! \brief Rectified Linear Operation */ -struct relu { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a > 0.0f ? a : 0.0f; - } -}; -struct relu_grad { - MSHADOW_XINLINE static real_t Map(real_t a) { - return a > 0.0f ? 1.0f : 0.0f; - } -}; - -} // namespace act } // namespace op } // namespace mxnet #endif // MXNET_OPERATOR_ACTIVATION_INL_H_ diff --git a/src/operator/activation.cc b/src/operator/activation.cc index b26c1e24dc53..6b822a68c8e5 100644 --- a/src/operator/activation.cc +++ b/src/operator/activation.cc @@ -7,13 +7,16 @@ #include #include "./activation-inl.h" +#include "./mshadow_op.h" namespace mxnet { namespace op { template<> Operator *CreateActivationOp(ActivationOpType type) { switch (type) { - case kReLU: return new ActivationOp(); + case kReLU: return new ActivationOp(); + case kSigmoid: return new ActivationOp(); + case kTanh: return new ActivationOp(); default: return NULL; } } diff --git a/src/operator/activation.cu b/src/operator/activation.cu index b6a523c003ec..b7c771d653f3 100644 --- a/src/operator/activation.cu +++ b/src/operator/activation.cu @@ -6,13 +6,16 @@ */ #include "./activation-inl.h" +#include "./mshadow_op.h" namespace mxnet { namespace op { template<> Operator *CreateActivationOp(ActivationOpType type) { switch(type) { - case kReLU: return new ActivationOp(); + case kReLU: return new ActivationOp(); + case kSigmoid: return new ActivationOp(); + case kTanh: return new ActivationOp(); default: return NULL; } } diff --git a/src/operator/static_operator/mshadow_op.h b/src/operator/mshadow_op.h similarity index 87% rename from src/operator/static_operator/mshadow_op.h rename to src/operator/mshadow_op.h index bb33471f168a..010cf0ce7cc9 100644 --- a/src/operator/static_operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -1,17 +1,18 @@ /*! * Copyright (c) 2015 by Contributors * \file mshadow_op.h - * \brief extra mshadow operation for mxnet + * \brief * \author Bing Xu - */ -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_MSHADOW_OP_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_MSHADOW_OP_H_ +*/ +#ifndef MXNET_OPERATOR_MSHADOW_OP_H_ +#define MXNET_OPERATOR_MSHADOW_OP_H_ + #include -#include namespace mxnet { -/*! \brief operations for ActivationLayer */ namespace op { +namespace mshadow_op { +/*! \brief identity Operation */ struct identity { MSHADOW_XINLINE static real_t Map(real_t a) { return a; @@ -98,9 +99,7 @@ struct square_root { return sqrt(a); } }; - +} // namespace mshadow_op } // namespace op } // namespace mxnet - -#endif // MXNET_OPERATOR_STATIC_OPERATOR_MSHADOW_OP_H_ - +#endif // MXNET_OPERATOR_MSHADOW_OP_H_ diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index 7ffa3d1456d2..eea731c8fbe6 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace mxnet { namespace op { diff --git a/src/operator/static_operator/activation_op-inl.h b/src/operator/static_operator/activation_op-inl.h deleted file mode 100644 index cfb0b7cec8b5..000000000000 --- a/src/operator/static_operator/activation_op-inl.h +++ /dev/null @@ -1,61 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file activation_op-inl.h - * \brief activation operator of mxnet - */ - -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_ACTIVATION_OP_INL_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_ACTIVATION_OP_INL_H_ - -#include -#include -#include -#include "./static_operator_common.h" - -namespace mxnet { -namespace op { -template -class ActivationOp : public StaticOperator { - public: - virtual void InferShape(std::vector *in_shape, - std::vector *out_shape) { - CHECK_EQ(in_shape->size(), 1) << "Only 1 input is allowed"; - CHECK_NE((*in_shape)[0].ndim(), 0) << "Require data shape to be known"; - out_shape->clear(); - out_shape->push_back((*in_shape)[0]); - } - virtual void Forward(Option opt, - RunContext ctx, - const std::vector &in_data, - const std::vector &out_data) { - CHECK_EQ(out_data.size(), 1); - CHECK_EQ(in_data.size(), 1); - mshadow::Stream *stream = \ - static_cast *>(ctx.stream); - mshadow::Tensor in = in_data[0].FlatTo2D(stream); - mshadow::Tensor out = out_data[0].FlatTo2D(stream); - out = mshadow::expr::F(in); - } - virtual void Backward(RunContext ctx, - const std::vector &grad_next, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad, - const std::vector &req) { - CHECK_EQ(grad_next.size(), 1); - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(req.size(), 1); - mshadow::Stream *stream = \ - static_cast *>(ctx.stream); - mshadow::Tensor grad = grad_next[0].FlatTo2D(stream); - mshadow::Tensor data = in_data[0].FlatTo2D(stream); - mshadow::Tensor out = out_grad[0].FlatTo2D(stream); - Assign(out, req[0], mshadow::expr::F( - mshadow::expr::F(data)) * grad); - } -}; // class ActivationOp -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_STATIC_OPERATOR_ACTIVATION_OP_INL_H_ From a3cde4ac8136ef2e1ff3f1b85a6dd6ac6740b315 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 15 Aug 2015 23:21:17 -0600 Subject: [PATCH 04/11] tmp save --- src/operator/activation-inl.h | 9 +- src/operator/elem_plus-inl.h | 202 ++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 src/operator/elem_plus-inl.h diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 27bde578a3b1..15ded9761b27 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -39,7 +39,7 @@ class ActivationOp : public Operator { Stream *s = ctx.get_stream(); Tensor data = in_data[kData].FlatTo2D(s); Tensor out = out_data[kOut].FlatTo2D(s); - out = F(data); + Assign(out, req[kOut], F(data)); } virtual void Backward(const OpContext &ctx, @@ -68,7 +68,9 @@ Operator* CreateActivationOp(ActivationOpType type); #if DMLC_USE_CXX11 class ActivationProp : public OperatorProperty { public: - ActivationProp() : type_(kUnknown) {} + explicit ActivationProp() : type_(kUnknown) {} + + explicit ActivationProp(ActivationOpType type) : type_(type) {} virtual void SetParam(const char *name, const char *val) { if (!strcmp(name, "type")) { @@ -90,8 +92,7 @@ class ActivationProp : public OperatorProperty { } virtual OperatorProperty* Copy() const { - auto ptr = new ActivationProp(); - ptr->type_ = this->type_; + auto ptr = new ActivationProp(type_); return ptr; } diff --git a/src/operator/elem_plus-inl.h b/src/operator/elem_plus-inl.h new file mode 100644 index 000000000000..df2f50fbb508 --- /dev/null +++ b/src/operator/elem_plus-inl.h @@ -0,0 +1,202 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file elem_plus-inl.h + * \brief + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_ELEM_PLUS_INL_H_ +#define MXNET_OPERATOR_ELEM_PLUS_INL_H_ +namespace mxnet { +namespace op { +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +enum ElemsPlusOpInputs {kData0, kData1, kData2, kData3}; +enum ElemsPlusOpOutputs {kOut}; + +template +class ElemPlusOp : public Operator { + public: + explicit ElemPlusOp(uint32_t size) : size_(size) {} + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), cnt_) << "Invalid Input TBlobs"; + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor out = out_data[kOut].FlatTo2D(s); + switch (size_) { + case 1: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Assign(out, req[kOut], in_0); + break; + } + case 2: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1); + break; + } + case 3: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1 + in_3); + break; + } + case 4: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Tensor in_3 = in_data[kData3].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1 + in_3 + in_4); + break; + } + default: { + LOG_FATAL; + } + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(in_data.size(), size_); + CHECK_EQ(out_data.size(), size_); + switch (size_) { + case 1: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor out_0 = out_data[kData0].FlatTo2D(s); + Assign(out, req[kData0], F(in_0)); + break; + } + case 2: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor out_0 = out_data[kData0].FlatTo2D(s); + Assign(out_0, req[kData0], F(in_0)); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor out_1 = out_data[kData1].FlatTo2D(s); + Assign(out_1, req[kData1], F(in_1)); + break; + } + case 3: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor out_0 = out_data[kData0].FlatTo2D(s); + Assign(out_0, req[kData0], F(in_0)); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor out_1 = out_data[kData1].FlatTo2D(s); + Assign(out_1, req[kData1], F(in_1)); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Tensor out_2 = out_data[kData2].FlatTo2D(s); + Assign(out_2, req[kData2], F(in_2)); + break; + } + case 4: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor out_0 = out_data[kData0].FlatTo2D(s); + Assign(out_0, req[kData0], F(in_0)); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor out_1 = out_data[kData1].FlatTo2D(s); + Assign(out_1, req[kData1], F(in_1)); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Tensor out_2 = out_data[kData2].FlatTo2D(s); + Assign(out_2, req[kData2], F(in_2)); + Tensor in_3 = in_data[kData3].FlatTo2D(s); + Tensor out_3 = out_data[kData3].FlatTo2D(s); + Assign(out_3, req[kData3], F(in_3)); + break; + } + default: { + LOG_FATAL; + } + } + } + + private: + uint32_t size_; +}; // class ElemPlusOp + +template +Operator* CreateElemPlusOp(uint32_t size); + +#if DMLC_USE_CXX11 +class ElemPlusProp : public OperatorProperty { + public: + explicit ElemPlusProp() : size_(0) {} + + explicit ElemPlusProp(uint32_t sz) : size_(sz) {} + + virtual void SetParam(const char *name, const char *val) { + if (!strcmp(name, "size")) size_ = static_cast(atoi(val)); + CHECK_GE(size_, 0); + } + + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + using namespace mshadow; + CHECK_GE(size_, 0); + CHECK_EQ(in_shape->size(), size_) << "Input should be: " << size_ << \ + "(Given: " << in_shape->size() << ")"; + const TShape &dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + for (auto i : size_) { + CHECK_EQ(dshape, in_shape->at(i)) << "Input at " << i << " has different shape"; + } + out_shape->clear(); + out_shape->push_back(dshape); + return true; + } + + virtual OperatorProperty* Copy() const { + auto ptr = new ElemPlusProp(size_); + return ptr; + } + + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + std::vector ret(size_); + for (auto i : size_) { + ret[i] = in_data[i]; + } + return ret; + } + + virtual std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const { + std::vector > ret; + for (auto i : size_) { + ret.emplace_back(in_data[i], in_grad[i]); + } + return ret; + } + + Operator* CreateOperator(Context ctx) const; + + private: + uint32_t size_; +}; // class ElemPlusProp + +#endif // DMLC_USE_CXX11 + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_ELEM_PLUS_INL_H_ From a2a6795ed72597eac20ec7275919ef8f9798a109 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 16 Aug 2015 18:29:01 -0600 Subject: [PATCH 05/11] adapt things to parameter --- Makefile | 6 +- include/mxnet/operator.h | 12 +- include/mxnet/symbolic.h | 14 ++ src/c_api.cc | 4 +- src/operator/activation-inl.h | 53 ++++---- src/operator/activation.cc | 14 +- src/operator/activation.cu | 9 +- src/operator/elem_plus-inl.h | 202 ----------------------------- src/operator/elementwise_sum-inl.h | 172 ++++++++++++++++++++++++ src/operator/elementwise_sum.cc | 24 ++++ src/operator/elementwise_sum.cu | 14 ++ src/operator/fully_connected-inl.h | 41 ++++-- src/operator/fully_connected.cc | 6 +- src/operator/fully_connected.cu | 2 +- src/registry.cc | 10 +- 15 files changed, 321 insertions(+), 262 deletions(-) delete mode 100644 src/operator/elem_plus-inl.h create mode 100644 src/operator/elementwise_sum-inl.h create mode 100644 src/operator/elementwise_sum.cc create mode 100644 src/operator/elementwise_sum.cu diff --git a/Makefile b/Makefile index da029f77ef27..e8a0a60844a1 100644 --- a/Makefile +++ b/Makefile @@ -58,14 +58,14 @@ endif BIN = test/api_registry_test OBJ = storage.o narray_op_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o fully_connected_cpu.o static_graph.o activation_cpu.o +OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a LIB_DEP = $(DMLC_CORE)/libdmlc.a ifeq ($(USE_CUDA), 1) - CUOBJ += narray_op_gpu.o fully_connected_gpu.o activation_gpu.o + CUOBJ += narray_op_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o endif .PHONY: clean all test lint doc @@ -89,6 +89,8 @@ fully_connected_cpu.o: src/operator/fully_connected.cc fully_connected_gpu.o: src/operator/fully_connected.cu activation_cpu.o: src/operator/activation.cc activation_gpu.o: src/operator/activation.cu +elementwise_sum_cpu.o: src/operator/elementwise_sum.cc +elementwise_sum_gpu.o: src/operator/elementwise_sum.cu lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 65d6e3e92637..1c6bd860ab27 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -126,6 +126,12 @@ class OperatorProperty { * \brief virtual destructor */ virtual ~OperatorProperty() {} + /*! + * \brief Initialize the Operator by setting the parameters + * This function need to be called before all other functions. + * \param kwargs the keyword arguments parameters + */ + virtual void Init(const std::vector >& kwargs) = 0; /*! * \brief Get input arguments of the Operator. * \return vector of arguments. @@ -159,12 +165,6 @@ class OperatorProperty { virtual int NumVisibleReturns() const { return NumReturns(); } - /*! - * \brief Set the parameters of the Operator. - * \param name parameter name - * \param val string for the configuration - */ - virtual void SetParam(const char *name, const char *val) {} /*! * \brief infer the shapes of outputs and unknown input arguments * \param in_shape the shape of input arguments of the operator diff --git a/include/mxnet/symbolic.h b/include/mxnet/symbolic.h index e04f82b4f30f..106fd31923c7 100644 --- a/include/mxnet/symbolic.h +++ b/include/mxnet/symbolic.h @@ -121,6 +121,20 @@ class StaticGraph { */ bool InferShape(std::vector* in_shape, std::vector* out_shape) const; + /*! + * \brief Add a full backward pass in the static graph. + * This function will add gradient nodes for each heads, + * and add the backward pass to backprop the gradients all + * the way to the arguments. + * + * This will change the nodes field in the StaticGraph, but will not change other fields. + * The head and input of Backward pass will be returned by head_grad_nodes and arg_grads. + * + * \param head_grad_nodes used to store the created head gradient inputs for backward pass. + * \param arg_grads used to store the gradient nodes + */ + void MakeBackwardPass(std::vector *head_grad_nodes, + std::vector *arg_grads) const; }; /*! diff --git a/src/c_api.cc b/src/c_api.cc index 896e0b5e5532..ed5446fc816a 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -316,9 +316,11 @@ int MXSymbolCreateAtomicSymbol(AtomicSymbolCreator creator, API_BEGIN(); OperatorPropertyEntry *e = static_cast(creator); op = (*e)(); + std::vector > kwargs; for (int i = 0; i < num_param; ++i) { - op->SetParam(keys[i], vals[i]); + kwargs.push_back({std::string(keys[i]), std::string(vals[i])}); } + op->Init(kwargs); *s = Symbol::Create(op); *out = s; API_END_HANDLE_ERROR(delete s; delete op); diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 15ded9761b27..6374d02cc53b 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -1,12 +1,14 @@ /*! * Copyright (c) 2015 by Contributors * \file activation-inl.h - * \brief + * \brief Activation operator * \author Bing Xu */ #ifndef MXNET_OPERATOR_ACTIVATION_INL_H_ #define MXNET_OPERATOR_ACTIVATION_INL_H_ + #include +#include #include #include #include @@ -20,7 +22,17 @@ namespace op { // // These enums are only visible within this header enum ActivationOpInputs {kData}; enum ActivationOpOutputs {kOut}; -enum ActivationOpType {kUnknown, kReLU, kSigmoid, kTanh}; +enum ActivationOpType {kReLU, kSigmoid, kTanh}; + +struct ActivationParam : public dmlc::Parameter { + // use int for enumeration + int type; + DMLC_DECLARE_PARAMETER(ActivationParam) { + // TODO(bing) support enum, str->int mapping + DMLC_DECLARE_FIELD(type).set_default(kReLU); + } +}; + /** * \brief This is the implementation of activation operator. * \tparam xpu The device that the op will be executed on. @@ -54,32 +66,26 @@ class ActivationOp : public Operator { CHECK(in_data.size() == 1 && in_grad.size() == 1); CHECK_EQ(req.size(), 1); Stream *s = ctx.get_stream(); - Tensor out_gradient = out_grad[kData].FlatTo2D(s); - Tensor output = out_data[kData].FlatTo2D(s); - Tensor grad = out_grad[kOut].FlatTo2D(s); - Assign(grad, req[kData], F(out_gradient * output)); + Tensor m_out_grad = out_grad[kOut].FlatTo2D(s); + Tensor m_out_data = out_data[kOut].FlatTo2D(s); + Tensor m_in_grad = in_grad[kData].FlatTo2D(s); + Assign(m_in_grad, req[kData], F(m_out_data) * m_out_grad); } }; // class ActivationOp // Decalre Factory function, used for dispatch specialization template -Operator* CreateActivationOp(ActivationOpType type); +Operator* CreateOp(ActivationParam type); #if DMLC_USE_CXX11 class ActivationProp : public OperatorProperty { public: - explicit ActivationProp() : type_(kUnknown) {} - - explicit ActivationProp(ActivationOpType type) : type_(type) {} - - virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "type")) { - if (!strcmp(val, "relu")) type_ = kReLU; - if (!strcmp(val, "sigmoid")) type_ = kSigmoid; - if (!strcmp(val, "tanh")) type_ = kTanh; - } - CHECK(type_ >= kReLU && type_ <= kTanh) << "Invalid activation type"; + virtual void Init(const std::vector >& kwargs) { + // TODO(bing) change directly to vector of pairs begin end + std::map kmap(kwargs.begin(), kwargs.end()); + param_.Init(kmap); } + virtual bool InferShape(std::vector *in_shape, std::vector *out_shape) const { using namespace mshadow; @@ -92,7 +98,8 @@ class ActivationProp : public OperatorProperty { } virtual OperatorProperty* Copy() const { - auto ptr = new ActivationProp(type_); + auto ptr = new ActivationProp(); + ptr->param_ = param_; return ptr; } @@ -105,7 +112,7 @@ class ActivationProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const { - return {out_grad[kOut], out_data[kData]}; + return {out_grad[kOut], out_data[kOut]}; } virtual std::vector > BackwardInplaceOption( @@ -113,19 +120,19 @@ class ActivationProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data, const std::vector &in_grad) const { - return {{out_grad[kData], in_grad[kData]}}; + return {{out_grad[kOut], in_grad[kData]}}; } virtual std::vector > ForwardInplaceOption( const std::vector &in_data, const std::vector &out_data) const { - return {{in_data[kData], out_data[kData]}}; + return {{in_data[kData], out_data[kOut]}}; } Operator* CreateOperator(Context ctx) const; private: - ActivationOpType type_; + ActivationParam param_; }; #endif // DMLC_USE_CXX11 } // namespace op diff --git a/src/operator/activation.cc b/src/operator/activation.cc index 6b822a68c8e5..275588e099af 100644 --- a/src/operator/activation.cc +++ b/src/operator/activation.cc @@ -1,7 +1,7 @@ /*! * Copyright (c) 2015 by Contributors * \file activation.cc - * \brief + * \brief activation op * \author Bing Xu */ @@ -12,20 +12,24 @@ namespace mxnet { namespace op { template<> -Operator *CreateActivationOp(ActivationOpType type) { - switch (type) { +Operator *CreateOp(ActivationParam param) { + switch (param.type) { case kReLU: return new ActivationOp(); case kSigmoid: return new ActivationOp(); case kTanh: return new ActivationOp(); - default: return NULL; + default: + LOG(FATAL) << "unknown activation type"; + return NULL; } } // DO_BIND_DISPATCH comes from operator_common.h Operator *ActivationProp::CreateOperator(Context ctx) const { - DO_BIND_DISPATCH(CreateActivationOp, type_); + DO_BIND_DISPATCH(CreateOp, param_); } +DMLC_REGISTER_PARAMETER(ActivationParam); + REGISTER_OP_PROPERTY(Activation, ActivationProp); } // namespace op } // namespace mxnet diff --git a/src/operator/activation.cu b/src/operator/activation.cu index b7c771d653f3..5b7b576e59d7 100644 --- a/src/operator/activation.cu +++ b/src/operator/activation.cu @@ -4,19 +4,20 @@ * \brief * \author Bing Xu */ - #include "./activation-inl.h" #include "./mshadow_op.h" namespace mxnet { namespace op { template<> -Operator *CreateActivationOp(ActivationOpType type) { - switch(type) { +Operator *CreateOp(ActivationParam param) { + switch(param.type) { case kReLU: return new ActivationOp(); case kSigmoid: return new ActivationOp(); case kTanh: return new ActivationOp(); - default: return NULL; + default: + LOG(FATAL) << "unknown activation"; + return NULL; } } } // op diff --git a/src/operator/elem_plus-inl.h b/src/operator/elem_plus-inl.h deleted file mode 100644 index df2f50fbb508..000000000000 --- a/src/operator/elem_plus-inl.h +++ /dev/null @@ -1,202 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file elem_plus-inl.h - * \brief - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_ELEM_PLUS_INL_H_ -#define MXNET_OPERATOR_ELEM_PLUS_INL_H_ -namespace mxnet { -namespace op { -#include -#include -#include -#include -#include -#include -#include "./operator_common.h" - -enum ElemsPlusOpInputs {kData0, kData1, kData2, kData3}; -enum ElemsPlusOpOutputs {kOut}; - -template -class ElemPlusOp : public Operator { - public: - explicit ElemPlusOp(uint32_t size) : size_(size) {} - - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), cnt_) << "Invalid Input TBlobs"; - CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); - Tensor out = out_data[kOut].FlatTo2D(s); - switch (size_) { - case 1: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Assign(out, req[kOut], in_0); - break; - } - case 2: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Assign(out, req[kOut], in_0 + in_1); - break; - } - case 3: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Tensor in_2 = in_data[kData2].FlatTo2D(s); - Assign(out, req[kOut], in_0 + in_1 + in_3); - break; - } - case 4: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Tensor in_2 = in_data[kData2].FlatTo2D(s); - Tensor in_3 = in_data[kData3].FlatTo2D(s); - Assign(out, req[kOut], in_0 + in_1 + in_3 + in_4); - break; - } - default: { - LOG_FATAL; - } - } - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), size_); - CHECK_EQ(out_data.size(), size_); - switch (size_) { - case 1: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor out_0 = out_data[kData0].FlatTo2D(s); - Assign(out, req[kData0], F(in_0)); - break; - } - case 2: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor out_0 = out_data[kData0].FlatTo2D(s); - Assign(out_0, req[kData0], F(in_0)); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Tensor out_1 = out_data[kData1].FlatTo2D(s); - Assign(out_1, req[kData1], F(in_1)); - break; - } - case 3: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor out_0 = out_data[kData0].FlatTo2D(s); - Assign(out_0, req[kData0], F(in_0)); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Tensor out_1 = out_data[kData1].FlatTo2D(s); - Assign(out_1, req[kData1], F(in_1)); - Tensor in_2 = in_data[kData2].FlatTo2D(s); - Tensor out_2 = out_data[kData2].FlatTo2D(s); - Assign(out_2, req[kData2], F(in_2)); - break; - } - case 4: { - Tensor in_0 = in_data[kData0].FlatTo2D(s); - Tensor out_0 = out_data[kData0].FlatTo2D(s); - Assign(out_0, req[kData0], F(in_0)); - Tensor in_1 = in_data[kData1].FlatTo2D(s); - Tensor out_1 = out_data[kData1].FlatTo2D(s); - Assign(out_1, req[kData1], F(in_1)); - Tensor in_2 = in_data[kData2].FlatTo2D(s); - Tensor out_2 = out_data[kData2].FlatTo2D(s); - Assign(out_2, req[kData2], F(in_2)); - Tensor in_3 = in_data[kData3].FlatTo2D(s); - Tensor out_3 = out_data[kData3].FlatTo2D(s); - Assign(out_3, req[kData3], F(in_3)); - break; - } - default: { - LOG_FATAL; - } - } - } - - private: - uint32_t size_; -}; // class ElemPlusOp - -template -Operator* CreateElemPlusOp(uint32_t size); - -#if DMLC_USE_CXX11 -class ElemPlusProp : public OperatorProperty { - public: - explicit ElemPlusProp() : size_(0) {} - - explicit ElemPlusProp(uint32_t sz) : size_(sz) {} - - virtual void SetParam(const char *name, const char *val) { - if (!strcmp(name, "size")) size_ = static_cast(atoi(val)); - CHECK_GE(size_, 0); - } - - virtual bool InferShape(std::vector *in_shape, - std::vector *out_shape) const { - using namespace mshadow; - CHECK_GE(size_, 0); - CHECK_EQ(in_shape->size(), size_) << "Input should be: " << size_ << \ - "(Given: " << in_shape->size() << ")"; - const TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; - for (auto i : size_) { - CHECK_EQ(dshape, in_shape->at(i)) << "Input at " << i << " has different shape"; - } - out_shape->clear(); - out_shape->push_back(dshape); - return true; - } - - virtual OperatorProperty* Copy() const { - auto ptr = new ElemPlusProp(size_); - return ptr; - } - - virtual std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const { - std::vector ret(size_); - for (auto i : size_) { - ret[i] = in_data[i]; - } - return ret; - } - - virtual std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const { - std::vector > ret; - for (auto i : size_) { - ret.emplace_back(in_data[i], in_grad[i]); - } - return ret; - } - - Operator* CreateOperator(Context ctx) const; - - private: - uint32_t size_; -}; // class ElemPlusProp - -#endif // DMLC_USE_CXX11 - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_ELEM_PLUS_INL_H_ diff --git a/src/operator/elementwise_sum-inl.h b/src/operator/elementwise_sum-inl.h new file mode 100644 index 000000000000..f0a558b3b0cc --- /dev/null +++ b/src/operator/elementwise_sum-inl.h @@ -0,0 +1,172 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file elemementwise_sum-inl.h + * \brief elementwise sum + * \author Bing Xu +*/ +#ifndef MXNET_OPERATOR_ELEMENTWISE_SUM_INL_H_ +#define MXNET_OPERATOR_ELEMENTWISE_SUM_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { + +enum ElementWiseSumOpInputs {kData0, kData1, kData2, kData3}; +enum ElementWiseSumOpOutputs {kOut}; + +struct ElementWiseSumParam : public dmlc::Parameter { + int size; + DMLC_DECLARE_PARAMETER(ElementWiseSumParam) { + DMLC_DECLARE_FIELD(size).set_range(1, 100); + } +}; + +template +class ElementWiseSumOp : public Operator { + public: + explicit ElementWiseSumOp(ElementWiseSumParam param) + : size_(param.size) {} + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(static_cast(in_data.size()), size_); + CHECK_EQ(out_data.size(), 1); + if (req[kOut] == kNullOp) return; + + Stream *s = ctx.get_stream(); + Tensor out = out_data[kOut].FlatTo2D(s); + switch (size_) { + case 2: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1); + break; + } + case 3: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1 + in_2); + break; + } + case 4: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Tensor in_1 = in_data[kData1].FlatTo2D(s); + Tensor in_2 = in_data[kData2].FlatTo2D(s); + Tensor in_3 = in_data[kData3].FlatTo2D(s); + Assign(out, req[kOut], in_0 + in_1 + in_2 + in_3); + break; + } + default: { + Tensor in_0 = in_data[kData0].FlatTo2D(s); + Assign(out, req[kOut], in_0); + for (int i = 0; i < size_; ++i) { + out += in_data[i].FlatTo2D(s); + } + } + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(out_grad.size(), static_cast(size_)); + Stream *s = ctx.get_stream(); + Tensor ograd = out_grad[kOut].FlatTo2D(s); + + for (int i = 0; i < size_; ++i) { + if (req[i] == kNullOp || req[i] == kWriteInplace) continue; + Tensor igrad = in_grad[i].FlatTo2D(s); + Assign(igrad, req[i], ograd); + } + } + + private: + int size_; +}; // class ElementWiseSumOp + +template +Operator* CreateOp(ElementWiseSumParam param); + +#if DMLC_USE_CXX11 +class ElementWiseSumProp : public OperatorProperty { + public: + virtual void Init(const std::vector >& kwargs) { + // TODO(bing) change directly to vector of pairs begin end + std::map kmap(kwargs.begin(), kwargs.end()); + param_.Init(kmap); + } + + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + using namespace mshadow; + CHECK_EQ(in_shape->size(), static_cast(param_.size)); + const TShape &dshape = in_shape->at(0); + if (dshape.ndim() == 0) return false; + for (int i = 1; i < param_.size; ++i) { + SHAPE_ASSIGN_CHECK(*in_shape, i, dshape); + } + out_shape->clear(); + out_shape->push_back(dshape); + return true; + } + + virtual OperatorProperty* Copy() const { + auto ptr = new ElementWiseSumProp(); + ptr->param_ = param_; + return ptr; + } + + virtual std::string TypeString() const { + return "ElementWiseSum"; + } + + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + return out_grad; + } + + virtual std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const { + return {{out_grad[0], in_grad[0]}}; + } + + virtual std::vector > ForwardInplaceOption( + const std::vector &in_data, + const std::vector &out_data) const { + return {{in_data[0], out_data[0]}}; + } + + Operator* CreateOperator(Context ctx) const; + + private: + ElementWiseSumParam param_; +}; // class ElementWiseSumProp + +#endif // DMLC_USE_CXX11 + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_ELEMENTWISE_SUM_INL_H_ diff --git a/src/operator/elementwise_sum.cc b/src/operator/elementwise_sum.cc new file mode 100644 index 000000000000..38e29141c7b3 --- /dev/null +++ b/src/operator/elementwise_sum.cc @@ -0,0 +1,24 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file elementwise_sum.cc + * \brief elementwise sum operator +*/ +#include +#include "./elementwise_sum-inl.h" +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(ElementWiseSumParam param) { + return new ElementWiseSumOp(param); +} + +// DO_BIND_DISPATCH comes from static_operator_common.h +Operator* ElementWiseSumProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(ElementWiseSumParam); + +REGISTER_OP_PROPERTY(ElementWiseSum, ElementWiseSumProp); +} // namespace op +} // namespace mxnet diff --git a/src/operator/elementwise_sum.cu b/src/operator/elementwise_sum.cu new file mode 100644 index 000000000000..7a9b443dad82 --- /dev/null +++ b/src/operator/elementwise_sum.cu @@ -0,0 +1,14 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file elementwise_sum.cu + * \brief elementwise sum operator +*/ +#include "./elementwise_sum-inl.h" +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(ElementWiseSumParam param) { + return new ElementWiseSumOp(param); +} +} // namespace op +} // namespace mxnet diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index e2913e65aba8..9dbb9bda8649 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -7,6 +7,7 @@ #define MXNET_OPERATOR_FULLY_CONNECTED_INL_H_ #include +#include #include #include #include @@ -22,6 +23,17 @@ namespace op { enum FullyConnectedOpInputs {kData, kWeight, kBias}; enum FullyConnectedOpOutputs {kOut}; +struct FullyConnectedParam : public dmlc::Parameter { + int num_hidden; + bool no_bias; + DMLC_DECLARE_PARAMETER(FullyConnectedParam) { + // TODO(bing) change to only set lower bound + // add support for boolean + DMLC_DECLARE_FIELD(num_hidden).set_range(1, 100000); + DMLC_DECLARE_FIELD(no_bias).set_default(false); + } +}; + /** * \brief This is the implementation of fully connected operator. * \tparam xpu The device that the op will be executed on. @@ -29,7 +41,7 @@ enum FullyConnectedOpOutputs {kOut}; template class FullyConnectedOp : public Operator { public: - explicit FullyConnectedOp(Param p) { + explicit FullyConnectedOp(FullyConnectedParam p) { this->param_ = p; } @@ -40,7 +52,7 @@ class FullyConnectedOp : public Operator { using namespace mshadow; using namespace mshadow::expr; CHECK_EQ(req[kOut], kWriteTo); - size_t expected = param_.no_bias == 0 ? 3 : 2; + size_t expected = param_.no_bias ? 2 : 3; CHECK_EQ(in_data.size(), expected); CHECK_EQ(out_data.size(), 1); // TODO(bing): check the BLAS Handle, be careful @@ -50,7 +62,7 @@ class FullyConnectedOp : public Operator { Tensor wmat = in_data[kWeight].get(s); Tensor out = out_data[kOut].FlatTo2D(s); out = dot(data, wmat.T()); - if (param_.no_bias == 0) { + if (!param_.no_bias) { Tensor bias = in_data[kBias].get(s); out += repmat(bias, data.size(0)); } @@ -65,7 +77,7 @@ class FullyConnectedOp : public Operator { using namespace mshadow; using namespace mshadow::expr; CHECK_EQ(out_grad.size(), 1); - size_t expected = param_.no_bias == 0 ? 3 : 2; + size_t expected = param_.no_bias ? 2 : 3; CHECK(in_data.size() == expected && in_grad.size() == expected); CHECK_EQ(req.size(), expected); // TODO(bing): check the BLAS Handle, be careful @@ -80,7 +92,7 @@ class FullyConnectedOp : public Operator { Tensor gwmat = in_grad[kWeight].get(s); Assign(gwmat, req[kWeight], dot(grad.T(), data)); // gradient of bias - if (param_.no_bias == 0) { + if (!param_.no_bias) { Tensor gbias = in_grad[kBias].get(s); Assign(gbias, req[kBias], sum_rows(grad)); } @@ -90,33 +102,34 @@ class FullyConnectedOp : public Operator { } private: - /** The param of the fully connected layer.*/ - Param param_; + FullyConnectedParam param_; }; // class FullyConnectedOp // Decalre Factory function, used for dispatch specialization template -Operator* CreateFullyConnectedOp(Param param); +Operator* CreateOp(FullyConnectedParam param); #if DMLC_USE_CXX11 class FullyConnectedProp : public OperatorProperty { public: virtual std::vector ListArguments() const { - if (param_.no_bias == 0) { + if (!param_.no_bias) { return {"data", "weight", "bias"}; } else { return {"data", "weight"}; } } - virtual void SetParam(const char *name, const char *val) { - param_.SetParam(name, val); + virtual void Init(const std::vector >& kwargs) { + // TODO(bing) change directly to vector of pairs begin end + std::map kmap(kwargs.begin(), kwargs.end()); + param_.Init(kmap); } virtual bool InferShape(std::vector *in_shape, std::vector *out_shape) const { using namespace mshadow; - if (param_.no_bias == 0) { + if (!param_.no_bias) { CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; } else { CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]"; @@ -137,7 +150,7 @@ class FullyConnectedProp : public OperatorProperty { num_input = dshape[1]; } SHAPE_ASSIGN_CHECK(*in_shape, kWeight, Shape2(param_.num_hidden, num_input)); - if (param_.no_bias == 0) { + if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, kBias, Shape1(param_.num_hidden)); } out_shape->clear(); @@ -173,7 +186,7 @@ class FullyConnectedProp : public OperatorProperty { Operator* CreateOperator(Context ctx) const; private: - Param param_; + FullyConnectedParam param_; }; // class FullyConnectedSymbol #endif } // namespace op diff --git a/src/operator/fully_connected.cc b/src/operator/fully_connected.cc index 362d3c5698aa..7d529cb3ed64 100644 --- a/src/operator/fully_connected.cc +++ b/src/operator/fully_connected.cc @@ -8,15 +8,17 @@ namespace mxnet { namespace op { template<> -Operator* CreateFullyConnectedOp(Param param) { +Operator* CreateOp(FullyConnectedParam param) { return new FullyConnectedOp(param); } // DO_BIND_DISPATCH comes from static_operator_common.h Operator* FullyConnectedProp::CreateOperator(Context ctx) const { - DO_BIND_DISPATCH(CreateFullyConnectedOp, param_); + DO_BIND_DISPATCH(CreateOp, param_); } +DMLC_REGISTER_PARAMETER(FullyConnectedParam); + REGISTER_OP_PROPERTY(FullyConnected, FullyConnectedProp); } // namespace op } // namespace mxnet diff --git a/src/operator/fully_connected.cu b/src/operator/fully_connected.cu index 223ef5166cc9..b97df8afb44c 100644 --- a/src/operator/fully_connected.cu +++ b/src/operator/fully_connected.cu @@ -7,7 +7,7 @@ namespace mxnet { namespace op { template<> -Operator* CreateFullyConnectedOp(Param param) { +Operator* CreateOp(FullyConnectedParam param) { return new FullyConnectedOp(param); } } // namespace op diff --git a/src/registry.cc b/src/registry.cc index 42fef1df3423..f64980d8bacc 100644 --- a/src/registry.cc +++ b/src/registry.cc @@ -25,12 +25,18 @@ Registry *Registry::Get() { return &instance; } -#if DMLC_USE_CXX11 + template NArrayFunctionEntry &Registry::Register(const std::string& name); template Registry *Registry::Get(); -#endif template OperatorPropertyEntry &Registry::Register(const std::string& name); template Registry *Registry::Get(); +// implementation of all factory functions +OperatorProperty *OperatorProperty::Create(const char* type_name) { + auto *creator = Registry::Find(type_name); + CHECK_NE(creator, nullptr) + << "Cannot find Operator " << type_name << " in registry"; + return (*creator)(); +} } // namespace mxnet From c78775a753736e4de7efd6dbc5dc4cecf84780e2 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 16 Aug 2015 23:17:53 -0600 Subject: [PATCH 06/11] implement backward --- include/mxnet/symbolic.h | 32 +++++++++++- src/symbol/static_graph.cc | 102 ++++++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/include/mxnet/symbolic.h b/include/mxnet/symbolic.h index 106fd31923c7..e24c03a0cd0b 100644 --- a/include/mxnet/symbolic.h +++ b/include/mxnet/symbolic.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "./base.h" @@ -37,6 +38,32 @@ class StaticGraph { uint32_t source_id; /*! \brief index of output from the source. */ uint32_t index; + /*! \brief default constructor */ + DataEntry() {} + /*! + * \brief constructor with source and index + * \param source_id source id + * \param index node index + */ + DataEntry(uint32_t source_id, uint32_t index) + : source_id(source_id), index(index) {} + /*! + * \brief compare equality + * \param other the other entry to compare + * \return whether two entries equals to each other + */ + inline bool operator==(const DataEntry &other) const { + return source_id == other.source_id && index == other.index; + } + /*! + * \brief comparator, allows to use map + * \param other the other entry to compare + * \return whether two entries is smaller than the other + */ + inline bool operator<(const DataEntry &other) const { + if (source_id == other.source_id) return index < other.index; + return source_id < other.source_id; + } }; /*! * \brief Operation Node in static graphs. @@ -131,10 +158,11 @@ class StaticGraph { * The head and input of Backward pass will be returned by head_grad_nodes and arg_grads. * * \param head_grad_nodes used to store the created head gradient inputs for backward pass. - * \param arg_grads used to store the gradient nodes +<<<<<<< HEAD + * \param arg_grads used to store gradients to args, can be multiple one if an argument is used by operator */ void MakeBackwardPass(std::vector *head_grad_nodes, - std::vector *arg_grads) const; + std::vector > *arg_grads); }; /*! diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index 62de7963638a..3bec3427fbb3 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include "../operator/operator_common.h" namespace mxnet { @@ -76,7 +77,7 @@ bool StaticGraph::InferNodeShapes(const std::vector &topo_order, const DataEntry& e = node.inputs[i]; (*node_out_shapes)[e.source_id][e.index] = in_shape[i]; } - } else if (node.is_backward()) { + } else if (nodes[nid].is_backward()) { // simply use shapes from forward pass to assign backward shape const Node& forward = nodes[node.backward_source_id]; CHECK(forward.is_forward()); @@ -150,4 +151,103 @@ bool StaticGraph::InferShape(std::vector *in_shape, } return true; } + +void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, + std::vector > *arg_grads) { + arg_grads->clear(); + head_grad_nodes->clear(); + // get topo order of nodes, before new nodes are added + std::vector topo_order = TopoSort(); + // map out_data entry to out_grad + std::map > grad_map; + // allocate head gradient nodes + for (DataEntry head : heads) { + uint32_t nid = static_cast(nodes.size()); + // create a variable node for gradient input + nodes.push_back(Node()); + Node &node = nodes[nid]; + std::ostringstream os; + os << nodes[head.source_id].name << '_' << head.index << "_grad"; + // TODO(bing): add index to name + node.name = os.str(); + DataEntry igrad(nid, 0); + head_grad_nodes->push_back(nid); + // update gradient map + auto it = grad_map.find(head); + if (it == grad_map.end()) { + grad_map[head] = {igrad}; + } else { + it->second.push_back(igrad); + } + } + // do backward pass traverse + for (auto it = topo_order.rbegin(); it != topo_order.rend(); ++it) { + uint32_t nid = *it; + // skip variables + if (nodes[nid].is_variable()) continue; + CHECK(nodes[nid].is_forward()) << "Do not support Backward of Backward"; + // get out_grad and out_data entry + std::vector out_grad, out_data; + // nvisible is out_grad.size() + int nvisible = nodes[nid].op->NumVisibleReturns(); + // ntotal is out_data.size() + int ntotal = nodes[nid].op->NumReturns(); + // check all outpus + for (int i = 0; i < ntotal; ++i) { + DataEntry odata(nid, static_cast(i)); + out_data.push_back(odata); + if (i >= nvisible) continue; + // get out_grad + auto it = grad_map.find(odata); + CHECK(it != grad_map.end()) << "bad graph"; + std::vector &gnodes = it->second; + if (gnodes.size() == 1) { + out_grad.push_back(gnodes[0]); + } else { + // find multiple gradients, need aggregate + std::ostringstream os_size, os_name; + uint32_t agg_node_id = static_cast(nodes.size()); + nodes.push_back(Node()); + Node &agg_node = nodes[agg_node_id]; + agg_node.op.reset(OperatorProperty::Create("ElementWiseSum")); + os_size << gnodes.size(); + agg_node.op->Init({{"size", os_size.str()}}); + os_name << nodes[nid].name << '_' << i << "_out_grad_agg"; + agg_node.name = os_name.str(); + agg_node.inputs = gnodes; + out_grad.push_back(DataEntry(agg_node_id, 0)); + } + } + // Create a gradient backward node + nodes.push_back(Node()); + uint32_t grad_node_id = static_cast(nodes.size()); + Node &grad_node = nodes[grad_node_id]; + // Point to the corresponding source + grad_node.backward_source_id = nid; + // select out the dependent inputs + grad_node.inputs = nodes[nid].op->BackwardInputs( + out_grad, nodes[nid].inputs, out_data); + grad_node.name = nodes[nid].name + "_backward"; + + // update gradient map + for (size_t i = 0; i < nodes[nid].inputs.size(); ++i) { + DataEntry idata = nodes[nid].inputs[i]; + DataEntry igrad(grad_node_id, static_cast(i)); + auto it = grad_map.find(idata); + if (it == grad_map.end()) { + grad_map[idata] = {igrad}; + } else { + it->second.push_back(igrad); + } + } + } + // create return values of arg_grads + arg_grads->resize(arg_nodes.size()); + for (size_t i = 0; i < arg_nodes.size(); ++i) { + DataEntry odata(arg_nodes[i], 0); + auto it = grad_map.find(odata); + CHECK(it != grad_map.end()) << "bad graph"; + arg_grads->at(i) = it->second; + } +} } // namespace mxnet From d9748f11d5a0edc57a3fd22fb7e441c3a69f00c6 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Mon, 17 Aug 2015 00:33:40 -0600 Subject: [PATCH 07/11] add enum to activation --- Makefile | 2 +- include/mxnet/c_api.h | 67 ++++++++++++- include/mxnet/symbolic.h | 2 +- python/mxnet/base.py | 2 +- python/mxnet/symbol.py | 132 +++++++++++++++++++++---- python/test_mnist.py | 131 +++++++++++++++++++++++++ src/c_api.cc | 67 +++++++++++++ src/operator/activation-inl.h | 5 +- src/operator/elementwise_sum-inl.h | 1 + src/operator/fully_connected-inl.h | 1 + windows/mxnet.sln | 28 ------ windows/mxnet.vcxproj | 148 ----------------------------- windows/mxnet.vcxproj.filters | 48 ---------- windows/mxnet.vcxproj.user | 4 - 14 files changed, 386 insertions(+), 252 deletions(-) create mode 100644 python/test_mnist.py delete mode 100755 windows/mxnet.sln delete mode 100755 windows/mxnet.vcxproj delete mode 100755 windows/mxnet.vcxproj.filters delete mode 100755 windows/mxnet.vcxproj.user diff --git a/Makefile b/Makefile index d13688e6f0dc..b763a406da23 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ endif BIN = test/api_registry_test test/test_storage OBJ = narray_op_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o +OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index fe035b21bc7f..cd0b6b2206c1 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -38,8 +38,9 @@ typedef void *AtomicSymbolHandle; typedef void *OperatorHandle; /*! \brief handle to a DataIterator */ typedef void *DataIterHandle; - -/*! +/*! \brief handle to an Executor */ +typedef void *ExecutorHandle; +/* * \brief return str message of the last error * all function in this file will return 0 when success * and -1 when an error occured, @@ -325,6 +326,7 @@ MXNET_DLL int MXSymbolCompose(SymbolHandle sym, * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data * The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional. * + * \param sym symbol handle * \param num_args numbe of input arguments. * \param keys the key of keyword args (optional) * \param arg_ind_ptr the head pointer of the rows in CSR @@ -458,4 +460,65 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); +//-------------------------------------------- +// Part 56: Executor +//-------------------------------------------- +/*! + * \brief Executor forward method + * + * \param handle executor handle + * \param len length of narray handles + * \param input input NArray handles + * + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXExecutorForward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *input); + +/** + * \brief Excecutor run backward + * + * \param handle execute handle + * \param len lenth + * \param head_grads NArray handle for heads' gradient + * + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXExecutorBackward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *head_grads); + +/** + * \brief Get executor's head NArray + * + * \param handle executor handle + * \param out_size output narray vector size + * \param out out put narray handles + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXExecutorHeads(ExecutorHandle handle, + mx_uint *out_size, + NArrayHandle **out); + +/** + * \brief Generate Executor from symbol + * + * \param handle executor hanlde (to be generated) + * \param symbol_handle symbol handle + * \param len length + * \param in_args in args array + * \param arg_grad_store arg grads handle array + * \param grad_req_type grad req array + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXExecutorBind(ExecutorHandle handle, + SymbolHandle symbol_handle, + int dev_mask, + int dev_id, + mx_uint len, + NArrayHandle *in_args, + NArrayHandle *arg_grad_store, + mx_uint *grad_req_type); + #endif // MXNET_C_API_H_ diff --git a/include/mxnet/symbolic.h b/include/mxnet/symbolic.h index e24c03a0cd0b..bee0e921117a 100644 --- a/include/mxnet/symbolic.h +++ b/include/mxnet/symbolic.h @@ -365,7 +365,7 @@ class Executor { * \brief Perform a Forward operation of Operator * After this operation, user can get the result by using function head. */ - virtual void Forward() = 0; + virtual void Forward(const std::vector &inputs) = 0; /*! * \brief Perform a Backward operation of the Operator. * This must be called after Forward. diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 8cb698aa8219..e30c77d382a3 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -69,7 +69,7 @@ def _load_lib(): FunctionHandle = ctypes.c_void_p SymbolCreatorHandle = ctypes.c_void_p SymbolHandle = ctypes.c_void_p - +ExecutorHandle = ctypes.c_void_p #---------------------------- # helper function definition #---------------------------- diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index 0caa4b6a0a90..c491eacb1ac4 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -1,13 +1,14 @@ # coding: utf-8 -# pylint: disable=invalid-name, protected-access +# pylint: disable=invalid-name, protected-access, too-many-locals """Symbol support of mxnet""" from __future__ import absolute_import import ctypes from .base import _LIB -from .base import c_array, c_str, mx_uint +from .base import c_array, c_str, mx_uint, NArrayHandle, ExecutorHandle from .base import SymbolHandle from .base import check_call +from .narray import NArray class Symbol(object): """SymbolCreator is a function that takes Param and return symbol""" @@ -162,7 +163,8 @@ def infer_shape(self, *args, **kwargs): The order is in the same order as list_returns() """ if len(args) != 0 and len(kwargs) != 0: - raise ValueError('Can only specify known argument shapes either by positional or kwargs way.') + raise ValueError('Can only specify known argument \ + shapes either by positional or kwargs way.') sdata = [] indptr = [0] if len(args) != 0: @@ -188,21 +190,23 @@ def infer_shape(self, *args, **kwargs): out_shape_ndim = ctypes.POINTER(mx_uint)() out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() complete = ctypes.c_int() - check_call(_LIB.MXSymbolInferShape( - self.handle, len(indptr) - 1, - c_array(ctypes.c_char_p, keys), - c_array(mx_uint, indptr), - c_array(mx_uint, sdata), - ctypes.byref(arg_shape_size), - ctypes.byref(arg_shape_ndim), - ctypes.byref(arg_shape_data), - ctypes.byref(out_shape_size), - ctypes.byref(out_shape_ndim), - ctypes.byref(out_shape_data), + check_call(_LIB.MXSymbolInferShape( \ + self.handle, len(indptr) - 1, \ + c_array(ctypes.c_char_p, keys), \ + c_array(mx_uint, indptr), \ + c_array(mx_uint, sdata), \ + ctypes.byref(arg_shape_size), \ + ctypes.byref(arg_shape_ndim), \ + ctypes.byref(arg_shape_data), \ + ctypes.byref(out_shape_size), \ + ctypes.byref(out_shape_ndim), \ + ctypes.byref(out_shape_data), \ ctypes.byref(complete))) if complete.value != 0: - arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]]) for i in range(arg_shape_size.value)] - out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]]) for i in range(out_shape_size.value)] + arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]]) \ + for i in range(arg_shape_size.value)] + out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]]) \ + for i in range(out_shape_size.value)] return (arg_shapes, out_shapes) else: return (None, None) @@ -216,6 +220,100 @@ def debug_str(self): Debug string of the symbol. """ debug_str = ctypes.c_char_p() - check_call(_LIB.MXSymbolPrint( + check_call(_LIB.MXSymbolPrint( \ self.handle, ctypes.byref(debug_str))) return debug_str.value + +class Executor(object): + """handle of executor""" + handle = None + def __init__(self, handle): + """Init an executor from handle + + Parameters + ---------- + handle: ExecutorHandle + ExecutorHandle generated by calling Bind + """ + if not isinstance(ExecutorHandle): + raise TypeError("Handle type error") + self.handle = handle + + def forward(self, inputs): + """do forward on inputs data + + Parameters + ---------- + inputs: Array of NArray + inputs narray to executor + """ + if self.handle == None: + raise Exception("Bind symbol before use executor") + for obj in inputs: + if not isinstance(obj, NArray): + raise TypeError("inputs must be NArray") + narray = c_array([item.handle for item in inputs]) + check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(inputs), narray)) + + def backward(self, grads): + """do backward on heads' grads + + Parameters + ---------- + grads: Array of NArray + heads' gradient + """ + if self.handle == None: + raise Exception("Bind symbol before use executor") + for obj in grads: + if not isinstance(obj, NArray): + raise TypeError("inputs must be NArray") + narray = c_array(NArrayHandle, [item.handle for item in grads]) + check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(grads), narray)) + + def heads(self): + """list all heads' output narray + + Returns + ------- + a list of narray binded to the heads of executor + """ + if self.handle == None: + raise Exception("Bind symbol before use executor") + out_size = mx_uint() + handles = ctypes.POINTER(ctypes.POINTER(NArrayHandle))() + check_call(_LIB.MXExecutorHeads(self.handle, ctypes.byref(out_szie), narrays)) + return [NArray(handle[i]) for i in xrange(out_size)] + + +def Bind(sym, ctx, args, args_grad, reqs): + """Bind a symbol to get an executor + + Parameters + ---------- + sym: Symbol + symbol to be binded + ctx: Context + context executor to run on + args: Array of NArray + input args to the symbol + args_grad: Array of NArray + input args' gradient + reqs: Array of enum + graident requirements + """ + """gradient requirements enum""" + enum = {"null" : 0, "write_to" : 1, "in_place":2, "add_to" : 3} + + if not isinstance(sym, Symbol): + raise TypeError("Symbol type error") + if not isinstance(ctx, Context): + raise TypeError("Context type error") + args_handle = c_array(NArrayHandle, [item.handle for item in args]) + args_grad_handle = c_array(NArrayHandle, [item.handle for item in args_grad]) + reqs_array = c_array(mx_uint, mx_uint(enum[item]) for item in req) + handle = ExecutorHandle() + check_call(_LIB.MXExecutorBind(handle, sym.handle, \ + mx_uint(ctx.device_mask), mx_uint(ctx.device_id), \ + args_handle, args_grad_handle, reqs_array) + return Executor(handle); \ No newline at end of file diff --git a/python/test_mnist.py b/python/test_mnist.py new file mode 100644 index 000000000000..71d79dd607e6 --- /dev/null +++ b/python/test_mnist.py @@ -0,0 +1,131 @@ +# pylint: skip-file +import mxnet as mx +import numpy as np +import os, cPickle, gzip + +def Softmax(x): + maxes = np.max(x, axis=1) + x -= maxes.reshape(maxes.shape[0], 1) + e = np.exp(x) + return e / np.sum(e, axis=1) + +def CalAcc(out, label): + pred = np.argmax(out, axis=1) + return np.sum(pred == label) * 1.0 / out.shape[0] + +def SetGradient(out_grad, label): + assert(out_grad.shape[0] == label.shape[0]) + for i in xrange(label.shape[0]): + k = label[i] + out_grad[i][k] -= 1.0 + +# load data +class MNISTIter(object): + def __init__(self, which_set, batch_size=100): + if not os.path.exists('mnist.pkl.gz'): + os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz") + f = gzip.open('mnist.pkl.gz', 'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + if which_set == 'train': + self.data = train_set[0] + self.label = np.asarray(train_set[1]) + elif which_set == 'valid': + self.data = valid_set[0] + self.label = np.asarray(valid_set[1]) + else: + self.data = test_set[0] + self.data = np.asarray(test_set[1]) + self.batch_size = batch_size + self.nbatch = self.data.shape[0] / batch_size + assert(self.data.shape[0] % batch_size == 0) # I am lazy + self.now_idx = -1 + def BeforeFirst(self): + self.now_idx = -1 + def Next(self): + self.now_idx += 1 + if self.now_idx == self.nbatch: + return False + return True + def Get(self): + if self.now_idx < 0: + raise Exception("Iterator is at head") + elif self.now_idx >= self.nbatch: + raise Exception("Iterator is at end") + start = self.now_idx * self.batch_size + end = (self.now_idx + 1) * self.batch_size + return (self.data[start:end, :], self.label[start:end]) + + + +# symbol net +batch_size = 100 +data = mx.sym.Variable('data') +fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=160) +act1 = mx.sym.Activation(data = fc1, name='relu1', type="relu") +fc2 = mx.sym.FullyConnected(data=act1, name='fc2', num_hidden=10) +args_list = fc2.list_arguments() + +# infer shape +data_shape = (batch_size, 784) +arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) +arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] +grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] +mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] +out_narray = mx.narray.create(out_shapes[0]) +inputs = dict(zip(args_list, arg_narrays)) + +# set random weight +for name, narray in inputs.items(): + if "weight" in name: + narray.numpy[:, :] = np.random.uniform(-0.01, 0.01, narray.numpy.shape) + + +# bind executer +# exec = bind(fc2, args_narray, grad_narray, req) +# update + +epoch = 10 +momentum = 0.9 +lr = 0.01 +wd = 0.0004 + +def Update(mom, grad, weight): + if len(mom.numpy.shape) == 1: + mom.numpy[:] = mom.numpy * momentum - lr * (grad.numpy + wd * weight.numpy) + else: + mom.numpy[:, :] = mom.numpy * momentum - lr * (grad.numpy + wd * weight.numpy) + weight += mom + +block = zip(mom_narrays, grad_narrays, arg_narrays) + + +train = MNISTIter("train") +valid = MNISTIter("valid") +for i in xrange(epoch): + # train + print "Epoch %d" % i + train_acc = 0.0 + val_acc = 0.0 + while train.Next(): + data, label = train.Get() + inputs["data"].numpy[:,:] = data + # exec.Forward(args_narray) + train_acc += CalAcc(out_narray.numpy, label) + SetGradient(out_narray.numpy, label) + # exec.Backward(out_narray) + for mom, grad, weight in block: + Update(mom, grad, weight) + # evaluate + while valid.Next(): + data, label = valid.Get() + inputs["data"].numpy[:,:] = data + # exec.Forward([ inputs["data"] ]) + val_acc += CalAcc(out_narray.numpy, label) + print "Train Acc: ", train_acc / train.nbatch + print "Valid Acc: ", val_acc / valid.nbatch + train.BeforeFirst() + valid.BeforeFirst() + + + diff --git a/src/c_api.cc b/src/c_api.cc index ed5446fc816a..2e97b916af9b 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -480,3 +480,70 @@ int MXSymbolInferShape(SymbolHandle sym, } API_END(); } + +MXNET_DLL int MXExecutorForward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *args) { + API_BEGIN(); + Executor *exec = static_cast(handle); + NArray **args_ptr = reinterpret_cast(args); + std::vector narrays; + for (mx_uint i = 0; i < len; ++i) { + narrays.emplace_back(*(args_ptr[i])); + } + exec->Forward(narrays); + API_END(); +} + + +MXNET_DLL int MXExecutorBackward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *head_grads) { + API_BEGIN(); + Executor *exec = static_cast(handle); + std::vector narrays; + NArray **args_ptr = reinterpret_cast(head_grads); + for (mx_uint i = 0; i < len; ++i) { + narrays.push_back(*(args_ptr[i])); + } + exec->Backward(narrays); + API_END(); +} + + +MXNET_DLL int MXExecutorHeads(ExecutorHandle handle, + mx_uint *out_size, + NArrayHandle **out) { + API_BEGIN(); + Executor *exec = static_cast(handle); + std::vector ret = exec->heads(); + + API_END(); +} + +MXNET_DLL int MXExecutorBind(ExecutorHandle handle, + SymbolHandle symbol_handle, + int dev_mask, + int dev_id, + mx_uint len, + NArrayHandle *in_args, + NArrayHandle *arg_grad_store, + mx_uint *grad_req_type) { + API_BEGIN(); + Executor *exec = static_cast(handle); + Symbol *symb = static_cast(symbol_handle); + Context ctx = Context(dev_mask, dev_id); + NArray **in_args_ptr = reinterpret_cast(in_args); + NArray **arg_grad_ptr = reinterpret_cast(arg_grad_store); + std::vector in_args_vec; + std::vector arg_grad_vec; + std::vector grad_req_vec; + for (mx_uint i = 0; i < len; ++i) { + in_args_vec.push_back(*(in_args_ptr[i])); + arg_grad_vec.push_back(*(arg_grad_ptr[i])); + grad_req_vec.push_back(static_cast(grad_req_type[i])); + } + handle = exec->Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); + API_END(); +} + diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 6374d02cc53b..fd643a6405da 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -28,8 +29,8 @@ struct ActivationParam : public dmlc::Parameter { // use int for enumeration int type; DMLC_DECLARE_PARAMETER(ActivationParam) { - // TODO(bing) support enum, str->int mapping - DMLC_DECLARE_FIELD(type).set_default(kReLU); + DMLC_DECLARE_FIELD(type).set_default(kReLU).add_enum("relu", kReLU).\ + add_enum("sigmoid", kSigmoid).add_enum("tanh", kTanh); } }; diff --git a/src/operator/elementwise_sum-inl.h b/src/operator/elementwise_sum-inl.h index f0a558b3b0cc..65a6ba1d5c99 100644 --- a/src/operator/elementwise_sum-inl.h +++ b/src/operator/elementwise_sum-inl.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 9dbb9bda8649..e92c9f1f66dd 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/windows/mxnet.sln b/windows/mxnet.sln deleted file mode 100755 index 16f82f6b6fb1..000000000000 --- a/windows/mxnet.sln +++ /dev/null @@ -1,28 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.21005.1 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mxnet", "mxnet.vcxproj", "{2DA41CBC-B8B2-4696-86CD-9AFBAB029661}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Debug|x64 = Debug|x64 - Release|Win32 = Release|Win32 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Debug|Win32.ActiveCfg = Debug|Win32 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Debug|Win32.Build.0 = Debug|Win32 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Debug|x64.ActiveCfg = Debug|x64 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Debug|x64.Build.0 = Debug|x64 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Release|Win32.ActiveCfg = Release|Win32 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Release|Win32.Build.0 = Release|Win32 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Release|x64.ActiveCfg = Release|x64 - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal diff --git a/windows/mxnet.vcxproj b/windows/mxnet.vcxproj deleted file mode 100755 index 2823478cc51f..000000000000 --- a/windows/mxnet.vcxproj +++ /dev/null @@ -1,148 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {2DA41CBC-B8B2-4696-86CD-9AFBAB029661} - Win32Proj - - - - Application - true - v120 - - - Application - true - v120 - - - Application - false - v120 - - - Application - false - v120 - - - - - - - - - - - - - - - - - - - true - - - true - - - true - - - true - - - - WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) - MultiThreadedDebugDLL - Level3 - ProgramDatabase - Disabled - - - MachineX86 - true - Windows - - - - - WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) - MultiThreadedDebugDLL - Level3 - ProgramDatabase - Disabled - $(solutionDir)\..\src - - - true - Console - - - - - WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) - MultiThreadedDLL - Level3 - ProgramDatabase - - - MachineX86 - true - Windows - true - true - - - - - WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) - MultiThreadedDLL - Level3 - ProgramDatabase - $(solutionDir)\..\src - - - true - Console - true - true - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/windows/mxnet.vcxproj.filters b/windows/mxnet.vcxproj.filters deleted file mode 100755 index 1ff068b088be..000000000000 --- a/windows/mxnet.vcxproj.filters +++ /dev/null @@ -1,48 +0,0 @@ - - - - - {4FC737F1-C7A5-4376-A066-2A32D752A2FF} - cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx - - - {93995380-89BD-4b04-88EB-625FBE52EBFB} - h;hh;hpp;hxx;hm;inl;inc;xsd - - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - - - - Source Files - - - Source Files - - - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - Header Files - - - \ No newline at end of file diff --git a/windows/mxnet.vcxproj.user b/windows/mxnet.vcxproj.user deleted file mode 100755 index ef5ff2a1fae6..000000000000 --- a/windows/mxnet.vcxproj.user +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file From f5c6d6793d8a976df212f5f5b878e0c381be6a05 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Tue, 18 Aug 2015 18:56:03 -0600 Subject: [PATCH 08/11] add pooling op --- Makefile | 7 +- include/mxnet/narray.h | 6 +- src/operator/activation-inl.h | 4 +- src/operator/fully_connected-inl.h | 8 +- src/operator/pooling-inl.h | 201 ++++++++++++++++++ src/operator/pooling.cc | 34 +++ src/operator/pooling.cu | 26 +++ src/operator/static_operator/pooling_op-inl.h | 153 ------------- 8 files changed, 275 insertions(+), 164 deletions(-) create mode 100644 src/operator/pooling-inl.h create mode 100644 src/operator/pooling.cc create mode 100644 src/operator/pooling.cu delete mode 100644 src/operator/static_operator/pooling_op-inl.h diff --git a/Makefile b/Makefile index b763a406da23..8be91fb4886b 100644 --- a/Makefile +++ b/Makefile @@ -58,14 +58,14 @@ endif BIN = test/api_registry_test test/test_storage OBJ = narray_op_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o +OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o pooling_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a LIB_DEP = $(DMLC_CORE)/libdmlc.a ifeq ($(USE_CUDA), 1) - CUOBJ += narray_op_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o + CUOBJ += narray_op_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o endif .PHONY: clean all test lint doc @@ -91,6 +91,9 @@ activation_cpu.o: src/operator/activation.cc activation_gpu.o: src/operator/activation.cu elementwise_sum_cpu.o: src/operator/elementwise_sum.cc elementwise_sum_gpu.o: src/operator/elementwise_sum.cu +pooling_cpu.o: src/operator/pooling.cc +pooling_gpu.o: src/operator/pooling.cu + lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ) lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ) diff --git a/include/mxnet/narray.h b/include/mxnet/narray.h index 92257b3f0269..798b71627378 100644 --- a/include/mxnet/narray.h +++ b/include/mxnet/narray.h @@ -25,6 +25,7 @@ namespace mxnet { */ class NArray { public: + typedef std::pair ChunkSkin; /*! \brief default cosntructor */ NArray() {} /*! @@ -35,7 +36,8 @@ class NArray { */ NArray(const TShape &shape, Context ctx, bool delay_alloc = false) - : ptr_(new Chunk(shape, ctx, delay_alloc)) { + : ptr_(std::make_shared(shape, ctx, delay_alloc)) { + // Change to std::make_shared } /*! * \brief constructing a static NArray that shares data with TBlob @@ -45,7 +47,7 @@ class NArray { * \param dev_id the device id this tensor sits at */ NArray(const TBlob &data, int dev_id) - : ptr_(new Chunk(data, dev_id)) { + : ptr_(std::make_shared(data, dev_id)) { } /*! * \return the shape of current NArray diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index fd643a6405da..e78eecfbeddc 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -29,8 +29,8 @@ struct ActivationParam : public dmlc::Parameter { // use int for enumeration int type; DMLC_DECLARE_PARAMETER(ActivationParam) { - DMLC_DECLARE_FIELD(type).set_default(kReLU).add_enum("relu", kReLU).\ - add_enum("sigmoid", kSigmoid).add_enum("tanh", kTanh); + DMLC_DECLARE_FIELD(type).set_default(kReLU).add_enum("relu", kReLU)\ + .add_enum("sigmoid", kSigmoid).add_enum("tanh", kTanh); } }; diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index e92c9f1f66dd..f129a27b228d 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -14,7 +14,7 @@ #include #include #include "./operator_common.h" -#include "./param.h" + namespace mxnet { namespace op { @@ -122,9 +122,7 @@ class FullyConnectedProp : public OperatorProperty { } virtual void Init(const std::vector >& kwargs) { - // TODO(bing) change directly to vector of pairs begin end - std::map kmap(kwargs.begin(), kwargs.end()); - param_.Init(kmap); + param_.Init(kwargs); } virtual bool InferShape(std::vector *in_shape, @@ -181,7 +179,7 @@ class FullyConnectedProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data, const std::vector &in_grad) const { - return {{in_grad[kData], in_data[kData]}}; + return {{in_data[kData], in_grad[kData]}}; } Operator* CreateOperator(Context ctx) const; diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h new file mode 100644 index 000000000000..8b223e2476a2 --- /dev/null +++ b/src/operator/pooling-inl.h @@ -0,0 +1,201 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file pooling-inl.h + * \brief + * \author Bing Xu +*/ + +#ifndef MXNET_OPERATOR_POOLING_INL_H_ +#define MXNET_OPERATOR_POOLING_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { +enum PoolingOpInputs {kData}; +enum PoolingOpOutputs {kOut}; +enum PoolingOpType {kMaxPooling, kAvgPooling, kSumPooling}; + +struct PoolingParam : public dmlc::Parameter { + int kernel_x; + int kernel_y; + int stride_x; + int stride_y; + int pad_x; + int pad_y; + int type; + DMLC_DECLARE_PARAMETER(PoolingParam) { + // TODO(bing) change to only set lower bound + DMLC_DECLARE_FIELD(kernel_x).set_range(1, 10000); + DMLC_DECLARE_FIELD(kernel_y).set_range(1, 10000); + DMLC_DECLARE_FIELD(stride_x).set_range(1, 10000); + DMLC_DECLARE_FIELD(stride_y).set_range(1, 10000); + DMLC_DECLARE_FIELD(pad_x).set_default(0).set_range(0, 10000); + DMLC_DECLARE_FIELD(pad_y).set_default(0).set_range(0, 10000); + DMLC_DECLARE_FIELD(type).set_default(kMaxPooling)\ + .add_enum("max", kMaxPooling).add_enum("avg", kAvgPooling)\ + .add_enum("sum", kSumPooling); + } +}; + +template +class PoolingOp : public Operator { + public: + explicit PoolingOp(PoolingParam p) { + this->param_ = p; + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(req[kOut], kWriteTo); + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor data = in_data[kData].get(s); + Tensor out = out_data[kOut].get(s); + mshadow::Shape<2> out_shape = Shape2(out.shape_[2], out.shape_[3]); + // TODO(bing): dual stride in mshadow + if (param_.type == kMaxPooling || param_.type == kSumPooling) { + out = pool(pad(data, param_.pad_y, param_.pad_x), + out_shape, + param_.kernel_y, + param_.kernel_x, + param_.kernel_y); + } else if (param_.type == kAvgPooling) { + out = (1.0f / (param_.kernel_y * param_.kernel_x)) * \ + pool(pad(data, param_.pad_y, param_.pad_x), + out_shape, + param_.kernel_y, + param_.kernel_x, + param_.kernel_y); + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(out_grad.size(), 1); + CHECK_EQ(in_data.size(), 1); + CHECK_EQ(out_data.size(), 1); + CHECK_EQ(req.size(), 1); + CHECK_EQ(in_grad.size(), 1); + // TODO(bing): remove pad (0,0) + Stream *s = ctx.get_stream(); + Tensor grad = out_grad[kOut].get(s); + Tensor data = in_data[kData].get(s); + Tensor output_data = out_data[kOut].get(s); + Tensor input_grad = in_grad[kData].get(s); + + mshadow::Shape<2> in_shape = Shape2(data.shape_[2], data.shape_[3]); + + if (param_.type == kMaxPooling || param_.type == kSumPooling) { + Assign(input_grad, req[kData], + crop(unpool(pad(data, param_.pad_y, param_.pad_x), + pad(output_data, 0, 0), + pad(grad, 0, 0), + param_.kernel_y, + param_.kernel_x, + param_.stride_y), + in_shape, + param_.pad_y, + param_.pad_x)); + } else if (param_.type == kAvgPooling) { + Assign(input_grad, req[kData], + (1.0f / param_.kernel_y / param_.kernel_x) *\ + crop(unpool(pad(data, param_.pad_y, param_.pad_x), + pad(output_data, 0, 0), + pad(grad, 0, 0), + param_.kernel_y, + param_.kernel_x, + param_.stride_y), + in_shape, + param_.pad_y, + param_.pad_x)); + } + } + + private: + PoolingParam param_; +}; // class PoolingOp + +template +Operator* CreateOp(PoolingParam param); + + +#if DMLC_USE_CXX11 +class PoolingProp : public OperatorProperty { + public: + virtual void Init(const std::vector >& kwargs) { + param_.Init(kwargs); + } + + virtual bool InferShape(std::vector *in_shape, + std::vector *out_shape) const { + CHECK_EQ(in_shape->size(), 1); + const TShape &dshape = (*in_shape)[0]; + CHECK_EQ(dshape.ndim(), 4) << \ + "Pooling: Input data should be 4D in (batch, channel, y, x)"; + TShape oshape = dshape; + if (dshape.ndim() == 0) return false; + oshape[2] = std::min(dshape[2] + 2 * param_.pad_y - param_.kernel_y + param_.stride_y - 1, + dshape[2] + 2 * param_.pad_y - 1) / param_.stride_y + 1; + oshape[3] = std::min(dshape[3] + 2 * param_.pad_x - param_.kernel_x + param_.stride_x - 1, + dshape[3] + 2 * param_.pad_x - 1) / param_.stride_x + 1; + CHECK(oshape[2] > 0 && oshape[3] > 0) << "Pooling: kernel size exceed input"; + out_shape->clear(); + out_shape->push_back(oshape); + return true; + } + + virtual OperatorProperty* Copy() const { + PoolingProp *prop_sym = new PoolingProp(); + prop_sym->param_ = this->param_; + return prop_sym; + } + + virtual std::string TypeString() const { + return "Pooling"; + } + + virtual std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + return {out_grad[kOut], in_data[kData], out_data[kOut]}; + } + + virtual std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const { + return {{in_data[kData], in_grad[kData]}}; + } + + Operator* CreateOperator(Context ctx) const; + + private: + PoolingParam param_; +}; // class PoolingProp +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_POOLING_INL_H_ diff --git a/src/operator/pooling.cc b/src/operator/pooling.cc new file mode 100644 index 000000000000..a6ebc91e0873 --- /dev/null +++ b/src/operator/pooling.cc @@ -0,0 +1,34 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file pooling.cc + * \brief + * \author Bing Xu +*/ + +#include +#include "./pooling-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(PoolingParam param) { + switch (param.type) { + case kMaxPooling: return new PoolingOp(param); + case kAvgPooling: return new PoolingOp(param); + case kSumPooling: return new PoolingOp(param); + default: + LOG(FATAL) << "unknown activation type"; + return NULL; + } +} + +Operator* PoolingProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(PoolingParam); + +REGISTER_OP_PROPERTY(Pooling, PoolingProp); +} // namespace op +} // namespace mxnet + diff --git a/src/operator/pooling.cu b/src/operator/pooling.cu new file mode 100644 index 000000000000..2db6d9ea549a --- /dev/null +++ b/src/operator/pooling.cu @@ -0,0 +1,26 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file pooling.cu + * \brief + * \author Bing Xu +*/ + +#include "./pooling-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(PoolingParam param) { + switch (param.type) { + case kMaxPooling: return new PoolingOp(param); + case kAvgPooling: return new PoolingOp(param); + case kSumPooling: return new PoolingOp(param); + default: + LOG(FATAL) << "unknown activation type"; + return NULL; + } +} + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/static_operator/pooling_op-inl.h b/src/operator/static_operator/pooling_op-inl.h deleted file mode 100644 index 8c6014a8c2cf..000000000000 --- a/src/operator/static_operator/pooling_op-inl.h +++ /dev/null @@ -1,153 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file pooling_op-inl.h - * \brief pooling operator - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_STATIC_OPERATOR_POOLING_OP_INL_H_ -#define MXNET_OPERATOR_STATIC_OPERATOR_POOLING_OP_INL_H_ - -#include -#include -#include -#include "./param.h" -#include "./static_operator_common.h" - - -namespace mxnet { -namespace op { -template -class PoolingOp : public StaticOperator { - public: - virtual void SetParam(const char *name, const char *val) { - param_.SetParam(name, val); - } - virtual void InferShape(std::vector *in_shape, - std::vector *out_shape) { - CHECK_EQ(in_shape->size(), 1) << "Input: [data]"; - CHECK_GT(param_.kernel_y, 0); - CHECK_GT(param_.kernel_x, 0); - const int ksize_y = static_cast(param_.kernel_y); - const int ksize_x = static_cast(param_.kernel_x); - const int pad_y = static_cast(param_.pad_y); - const int pad_x = static_cast(param_.pad_x); - // TODO(bing): dual stride - const int kstride = static_cast(param_.stride_y); - mshadow::Shape<4> ishape = (*in_shape)[0].get<4>(); - oshape_ = ishape; - fea_shape_ = mshadow::Shape2(ishape[2], ishape[3]); - oshape_[2] = std::min(ishape[2] + 2 * pad_y - ksize_y + kstride - 1, - ishape[2] + 2 * pad_y - 1) / kstride + 1; - oshape_[3] = std::min(ishape[3] + 2 * pad_x - ksize_x + kstride - 1, - ishape[3] + 2 * pad_x - 1) / kstride + 1; - CHECK(oshape_[2] > 0 && oshape_[3] > 0) << "kernel size exceed input"; - out_shape->clear(); - out_shape->push_back((*in_shape)[0]); - (*out_shape)[0][2] = oshape_[2]; - (*out_shape)[0][3] = oshape_[3]; - } - virtual void Forward(Option opt, - RunContext ctx, - const std::vector &in_data, - const std::vector &out_data) { - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 0); - if (!(temp_.shape_ == oshape_)) { - temp_.Resize(oshape_); - } - const int ksize_y = param_.kernel_y; - const int ksize_x = param_.kernel_x; - const int pad_y = param_.pad_y; - const int pad_x = param_.pad_x; - // TODO(bing): dual stride - const int kstride = param_.stride_y; - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor data = in_data[0].get(s); - Tensor out = out_data[0].get(s); - mshadow::Shape<2> pshape = Shape2(out.shape_[2], out.shape_[3]); - if (mode == kMaxPooling || mode == kSumPooling) { - temp_ = pool(pad(data, pad_y, pad_x), - pshape, - ksize_y, - ksize_x, - kstride); - } else if (mode == kAvgPooling) { - temp_ = (1.0f / (ksize_y * ksize_x)) * \ - pool(pad(data, pad_y, pad_x), - pshape, - ksize_y, - ksize_x, - kstride); - } else { - LOG(FATAL) << "Unknown pooling mode"; - } - Copy(out, temp_, s); - } - virtual void Backward(RunContext ctx, - const std::vector &grad_next, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad, - const std::vector &req) { - CHECK_EQ(grad_next.size(), 1); - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(req.size(), 1); - const int ksize_y = param_.kernel_y; - const int ksize_x = param_.kernel_x; - const int pad_y = param_.pad_y; - const int pad_x = param_.pad_x; - // TODO(bing): dual stride - const int kstride = param_.stride_y; - using namespace mshadow; - using namespace mshadow::expr; - Stream *s = static_cast *>(ctx.stream); - Tensor grad = grad_next[0].get(s); - Tensor data = in_data[0].get(s); - Tensor out = out_grad[0].get(s); - if (mode == kMaxPooling || mode == kSumPooling) { - Assign(out, - req[0], - crop(unpool(pad(data, pad_y, pad_x), - pad(temp_, 0, 0), - pad(grad, 0, 0), - ksize_y, - ksize_x, - kstride), - fea_shape_, - pad_y, - pad_x)); - } else if (mode == kAvgPooling) { - Assign(out, - req[0], - (1.0f / (ksize_y * ksize_x)) * \ - crop(unpool(pad(data, pad_y, pad_x), - pad(temp_, 0, 0), - pad(grad, 0, 0), - ksize_y, - ksize_x, - kstride), - fea_shape_, - pad_y, - pad_x)); - } else { - LOG(FATAL) << "Unknown pooling mode"; - } - } - - private: - /*! \brief parameters that potentially be useful */ - Param param_; - /*! \brief temp space to save pooled result */ - mshadow::TensorContainer temp_; - /*! \brief pooled output shape */ - mshadow::Shape<4> oshape_; - /*! \brief input feature map shape */ - mshadow::Shape<2> fea_shape_; -}; // class PoolingOp - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_STATIC_OPERATOR_POOLING_OP_INL_H_ From 0448d6743f2cced56f1a8cee8c1129fed68ed37e Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Tue, 18 Aug 2015 23:47:46 -0600 Subject: [PATCH 09/11] graph exec as discussed --- Makefile | 3 +- include/mxnet/context.h | 8 + include/mxnet/narray.h | 6 +- include/mxnet/operator.h | 83 +++-- include/mxnet/symbolic.h | 10 +- src/c_api.cc | 10 +- src/operator/activation-inl.h | 12 +- src/operator/elementwise_sum-inl.h | 8 +- src/operator/fully_connected-inl.h | 4 +- src/symbol/graph_executor.cc | 473 ++++++++++++++++++++++++++++ src/symbol/graph_executor.h | 179 +++++++++++ src/symbol/graph_memory_allocator.h | 145 +++++++++ src/symbol/static_graph.cc | 41 ++- 13 files changed, 906 insertions(+), 76 deletions(-) create mode 100644 src/symbol/graph_executor.cc create mode 100644 src/symbol/graph_executor.h create mode 100644 src/symbol/graph_memory_allocator.h diff --git a/Makefile b/Makefile index 8be91fb4886b..50e9a21c50e8 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ endif BIN = test/api_registry_test test/test_storage OBJ = narray_op_cpu.o # add threaded engine after it is done -OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o pooling_cpu.o +OBJCXX11 = engine.o narray.o c_api.o registry.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o elementwise_sum_cpu.o graph_executor.o pooling_cpu.o CUOBJ = SLIB = lib/libmxnet.so ALIB = lib/libmxnet.a @@ -81,6 +81,7 @@ narray.o: src/narray/narray.cc narray_op_cpu.o: src/narray/narray_op_cpu.cc src/narray/narray_op-inl.h narray_op_gpu.o: src/narray/narray_op_gpu.cu src/narray/narray_op-inl.h symbol.o: src/symbol/symbol.cc +graph_executor.o: src/symbol/graph_executor.cc static_graph.o : src/symbol/static_graph.cc registry.o: src/registry.cc c_api.o: src/c_api.cc diff --git a/include/mxnet/context.h b/include/mxnet/context.h index 8dfa618ca180..700bb36f0abb 100644 --- a/include/mxnet/context.h +++ b/include/mxnet/context.h @@ -33,6 +33,14 @@ struct Context { inline bool operator==(const Context &b) const { return dev_mask == b.dev_mask && dev_id == b.dev_id; } + /*! + * \brief check if current context not equals another one + * \param b another context to compare + * \return whether they are not the same + */ + inline bool operator!=(const Context &b) const { + return !(*this == b); + } }; /*! diff --git a/include/mxnet/narray.h b/include/mxnet/narray.h index 798b71627378..92257b3f0269 100644 --- a/include/mxnet/narray.h +++ b/include/mxnet/narray.h @@ -25,7 +25,6 @@ namespace mxnet { */ class NArray { public: - typedef std::pair ChunkSkin; /*! \brief default cosntructor */ NArray() {} /*! @@ -36,8 +35,7 @@ class NArray { */ NArray(const TShape &shape, Context ctx, bool delay_alloc = false) - : ptr_(std::make_shared(shape, ctx, delay_alloc)) { - // Change to std::make_shared + : ptr_(new Chunk(shape, ctx, delay_alloc)) { } /*! * \brief constructing a static NArray that shares data with TBlob @@ -47,7 +45,7 @@ class NArray { * \param dev_id the device id this tensor sits at */ NArray(const TBlob &data, int dev_id) - : ptr_(std::make_shared(data, dev_id)) { + : ptr_(new Chunk(data, dev_id)) { } /*! * \return the shape of current NArray diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index 0fa1fb6a0571..bc1d79b20b38 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -8,6 +8,7 @@ #define MXNET_OPERATOR_H_ #include +#include #include #include #include @@ -108,7 +109,9 @@ class Operator { const std::vector &in_data, const std::vector &out_data, const std::vector &req, - const std::vector &in_grad) = 0; + const std::vector &in_grad) { + LOG(FATAL) << "Backward is not implemented"; + } }; #if DMLC_USE_CXX11 @@ -255,28 +258,36 @@ class OperatorProperty { * This function enables optimization to reuse memory of inputs in output. * Only override when necessary, by default in-place is disabled. * + * The reason for void* type in the out_data is to distinguish the order + * of mappings between the two, compiler will report error when + * in_data and out_data's order in the pair get reversed. + * * \code * // The following code says out_data[0] can share data with in_data[0] - * vector > ForwardInplaceOption(const vector &in_data, - * const vector &out_data) const { - * return {{out_data[0], in_data[0]}}; + * vector > ForwardInplaceOption(const vector &in_data, + * const vector &out_data) const { + * return {{in_data[0], out_data[0]}}; * } * \endcode * \param in_data The input data in forward pass. * \param out_data The output data in forward pass. - * \return list of pair of integers taken from the inputs vector, + * \return list of pair of that maps input->output, * indicating possible in place operations. */ - virtual std::vector > ForwardInplaceOption( + virtual std::vector > ForwardInplaceOption( const std::vector &in_data, - const std::vector &out_data) const { - return std::vector >(); + const std::vector &out_data) const { + return std::vector >(); } /*! * \brief Get possible backward inplace options. * This function enables optimization to reuse memory of inputs in output. * Only override when necessary, by default in-place is disabled. * + * The reason for void* type in the in_grad is to distinguish the order + * of mappings between the two, compiler will report error when + * in_data and out_data's order in the pair get reversed. + * * \code * // The following code says in_grad[0] can share data with in_data[0] * vector > BackwardInplaceOption( @@ -284,22 +295,22 @@ class OperatorProperty { * const std::vector &in_data, * const std::vector &out_data, * const std::vector &in_grad) const { - * return {in_grad[0], in_data[0]}}; + * return {in_data[0], in_grad[0]}}; * } * \endcode * \param in_data The input data in forward pass. * \param out_data The output data in forward pass. * \param in_grad Gradient of inputs in backward pass. * \param out_grad Gradient of outputs in backward pass. - * \return list of pair of integers taken from the inputs vector, + * \return list of pair of that maps input->output, * indicating possible in place operations. */ - virtual std::vector > BackwardInplaceOption( + virtual std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data, - const std::vector &in_grad) const { - return std::vector >(); + const std::vector &in_grad) const { + return std::vector >(); } /*! * \brief Get Backward Input Dependency for generic types of data. @@ -314,31 +325,35 @@ class OperatorProperty { * \sa DeclareBackwardDependency */ template - inline std::vector BackwardInputs(const std::vector &in_data, - const std::vector &out_data, - const std::vector &out_grad) const { - int cnt = 0; - std::vector all_vec; - std::vector in_data_idx, out_data_idx, out_grad_idx; - for (size_t i = 0; i < in_data.size(); ++i) { - in_data_idx.push_back(cnt++); - all_vec.push_back(in_data[i]); + inline std::vector BackwardInputs(const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const { + int counter = 0; + std::vector out_grad_index(out_grad.size()); + std::vector in_data_index(out_data.size()); + std::vector out_data_index(out_data.size()); + for (size_t i = 0; i < out_grad_index.size(); ++i) { + out_grad_index[i] = counter++; } - for (size_t i = 0; i < out_data.size(); ++i) { - out_data_idx.push_back(cnt++); - all_vec.push_back(out_data[i]); + for (size_t i = 0; i < in_data_index.size(); ++i) { + in_data_index[i] = counter++; } - for (size_t i = 0; i < out_grad.size(); ++i) { - out_grad_idx.push_back(cnt++); - all_vec.push_back(out_data[i]); + for (size_t i = 0; i < out_data_index.size(); ++i) { + out_data_index[i] = counter++; } - std::vector ret_idx = this->DeclareBackwardDependency( - in_data_idx, out_data_idx, out_grad_idx); - std::vector ret; - for (size_t i = 0; i < ret_idx.size(); ++i) { - ret.push_back(all_vec[ret_idx[i]]); + std::vector all_data; + all_data.insert(all_data.end(), out_grad.begin(), out_grad.end()); + all_data.insert(all_data.end(), in_data.begin(), in_data.end()); + all_data.insert(all_data.end(), out_data.begin(), out_data.end()); + + std::vector ret_index = this->DeclareBackwardDependency( + out_grad_index, in_data_index, out_data_index); + + std::vector ret(ret_index.size()); + for (size_t i = 0; i < ret_index.size(); ++i) { + ret[i] = all_data[ret_index[i]]; } - return ret; + return std::move(ret); } /*! * \brief create OperatorProperty diff --git a/include/mxnet/symbolic.h b/include/mxnet/symbolic.h index bee0e921117a..df06c4913de8 100644 --- a/include/mxnet/symbolic.h +++ b/include/mxnet/symbolic.h @@ -158,11 +158,17 @@ class StaticGraph { * The head and input of Backward pass will be returned by head_grad_nodes and arg_grads. * * \param head_grad_nodes used to store the created head gradient inputs for backward pass. -<<<<<<< HEAD * \param arg_grads used to store gradients to args, can be multiple one if an argument is used by operator */ void MakeBackwardPass(std::vector *head_grad_nodes, std::vector > *arg_grads); + + /*! + * \brief create a sum node that aggregates gradient together + * \param grad_source the source of the inputs. + * \return a created ElementWiseSum node + */ + static Node CreateSumNode(const std::vector &grad_source); }; /*! @@ -365,7 +371,7 @@ class Executor { * \brief Perform a Forward operation of Operator * After this operation, user can get the result by using function head. */ - virtual void Forward(const std::vector &inputs) = 0; + virtual void Forward() = 0; /*! * \brief Perform a Backward operation of the Operator. * This must be called after Forward. diff --git a/src/c_api.cc b/src/c_api.cc index 2e97b916af9b..ab7899767555 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -486,12 +486,10 @@ MXNET_DLL int MXExecutorForward(ExecutorHandle handle, NArrayHandle *args) { API_BEGIN(); Executor *exec = static_cast(handle); - NArray **args_ptr = reinterpret_cast(args); - std::vector narrays; - for (mx_uint i = 0; i < len; ++i) { - narrays.emplace_back(*(args_ptr[i])); - } - exec->Forward(narrays); + CHECK_EQ(len, 0) + << "forward do not take narray for now"; + // TODO(bing): remove args for now + exec->Forward(); API_END(); } diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index e78eecfbeddc..3d57d6a88102 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -29,8 +29,8 @@ struct ActivationParam : public dmlc::Parameter { // use int for enumeration int type; DMLC_DECLARE_PARAMETER(ActivationParam) { - DMLC_DECLARE_FIELD(type).set_default(kReLU).add_enum("relu", kReLU)\ - .add_enum("sigmoid", kSigmoid).add_enum("tanh", kTanh); + DMLC_DECLARE_FIELD(type).set_default(kReLU).add_enum("relu", kReLU).\ + add_enum("sigmoid", kSigmoid).add_enum("tanh", kTanh); } }; @@ -116,17 +116,17 @@ class ActivationProp : public OperatorProperty { return {out_grad[kOut], out_data[kOut]}; } - virtual std::vector > BackwardInplaceOption( + virtual std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data, - const std::vector &in_grad) const { + const std::vector &in_grad) const { return {{out_grad[kOut], in_grad[kData]}}; } - virtual std::vector > ForwardInplaceOption( + virtual std::vector > ForwardInplaceOption( const std::vector &in_data, - const std::vector &out_data) const { + const std::vector &out_data) const { return {{in_data[kData], out_data[kOut]}}; } diff --git a/src/operator/elementwise_sum-inl.h b/src/operator/elementwise_sum-inl.h index 65a6ba1d5c99..4a0d6e3fdd57 100644 --- a/src/operator/elementwise_sum-inl.h +++ b/src/operator/elementwise_sum-inl.h @@ -146,17 +146,17 @@ class ElementWiseSumProp : public OperatorProperty { return out_grad; } - virtual std::vector > BackwardInplaceOption( + virtual std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data, - const std::vector &in_grad) const { + const std::vector &in_grad) const { return {{out_grad[0], in_grad[0]}}; } - virtual std::vector > ForwardInplaceOption( + virtual std::vector > ForwardInplaceOption( const std::vector &in_data, - const std::vector &out_data) const { + const std::vector &out_data) const { return {{in_data[0], out_data[0]}}; } diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index f129a27b228d..b49e5c422739 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -174,11 +174,11 @@ class FullyConnectedProp : public OperatorProperty { return {out_grad[kOut], in_data[kData], in_data[kWeight]}; } - virtual std::vector > BackwardInplaceOption( + virtual std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data, - const std::vector &in_grad) const { + const std::vector &in_grad) const { return {{in_data[kData], in_grad[kData]}}; } diff --git a/src/symbol/graph_executor.cc b/src/symbol/graph_executor.cc new file mode 100644 index 000000000000..8cf50541959e --- /dev/null +++ b/src/symbol/graph_executor.cc @@ -0,0 +1,473 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file graph_executor.cc + * \brief Executor to execute the Graph. +*/ +#include +#include +#include +#include "./graph_executor.h" + +namespace mxnet { +/*! + * \brief wrapper class that wraps Backward operation as Forward. + */ +class GraphExecutor::BackwardOpWrapper : public Operator { + public: + /*! + * \brief create a backward Operator wrapper given forward op. + * \param prop pointer to the property of forward wrapper + * \param forward_op the shared ptr to Forward operator + * \return the created wrapper. + */ + explicit BackwardOpWrapper(const OperatorProperty *prop, + std::shared_ptr forward_op) + : op_(forward_op) { + out_grad_.resize(prop->NumReturns()); + in_data_.resize(prop->ListArguments().size()); + out_data_.resize(prop->NumVisibleReturns()); + + std::vector out_grad_ptr(out_grad_.size()); + for (size_t i = 0; i < out_grad_.size(); ++i) { + out_grad_ptr[i] = &out_grad_[i]; + } + std::vector in_data_ptr(in_data_.size()); + for (size_t i = 0; i < in_data_.size(); ++i) { + in_data_ptr[i] = &in_data_[i]; + } + std::vector out_data_ptr(out_data_.size()); + for (size_t i = 0; i < out_data_.size(); ++i) { + out_data_ptr[i] = &out_data_[i]; + } + arg_data_ptr_ = prop->BackwardInputs( + out_grad_ptr, out_data_ptr, in_data_ptr); + } + // implement forward + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data) { + // set things correctly + CHECK(arg_data_ptr_.size() == in_data.size()); + for (size_t i = 0; i < in_data.size(); ++i) { + *(arg_data_ptr_[i]) = in_data[i]; + } + // redirect internally + op_->Backward(ctx, out_grad_, in_data_, out_data_, req, out_data); + } + + private: + /*! \brief internal forward operator */ + std::shared_ptr op_; + /*! \brief internal space for out_grad */ + std::vector out_grad_; + /*! \brief internal space for in_data */ + std::vector in_data_; + /*! \brief internal space for out_data */ + std::vector out_data_; + /*! + * \brief pointer to places in the internal space. + * arg_data_ptr_ maps in_data in Forward to the internal space. + */ + std::vector arg_data_ptr_; +}; + +// get resource +inline std::vector +GraphExecutor::GetResource(uint32_t node_id) const { + const StaticGraph::Node &node = graph_.nodes[node_id]; + if (node.is_forward()) { + return node.op->ForwardResource(); + } else { + CHECK(node.is_backward()); + return graph_.nodes[node.backward_source_id].op->BackwardResource(); + } +} + +inline int GraphExecutor::GetNumOutputs(uint32_t node_id) const { + const StaticGraph::Node &node = graph_.nodes[node_id]; + if (node.is_forward()) { + return node.op->NumReturns(); + } else if (node.is_backward()) { + return static_cast( + graph_.nodes[node.backward_source_id].op->ListArguments().size()); + } else { + CHECK(node.is_variable()); + return 1; + } +} + +// implement get input option +template +inline std::vector > GraphExecutor::GetInplaceOption( + uint32_t node_id, + const std::vector &in_data, + const std::vector &out_data) const { + // get the node + const StaticGraph::Node &node = graph_.nodes[node_id]; + + if (node.is_forward()) { + std::vector in_data_index(in_data.size()); + for (size_t i = 0; i < in_data.size(); ++i) { + in_data_index[i] = static_cast(i); + } + std::vector out_data_ptr(out_data.size()); + for (size_t i = 0; i < out_data.size(); ++i) { + out_data_ptr[i] = (void*)&out_data[i]; // NOLINT(*) + } + auto rmap_index = node.op->ForwardInplaceOption(in_data_index, out_data_ptr); + std::vector > remap(rmap_index.size()); + for (size_t i = 0; i < remap.size(); ++i) { + remap[i].first = in_data[rmap_index[i].first]; + remap[i].second = *static_cast(rmap_index[i].second); + } + return std::move(remap); + } else { + CHECK(node.is_backward()); + // forward property + const OperatorProperty *fwd = graph_.nodes[node.backward_source_id].op.get(); + + std::vector out_grad_index(fwd->NumReturns()); + std::vector out_data_index(fwd->NumVisibleReturns()); + std::vector in_data_index(fwd->ListArguments().size()); + CHECK_EQ(in_data_index.size(), out_data.size()); + int counter = 0; + for (size_t i = 0; i < out_grad_index.size(); ++i) { + out_grad_index[i] = counter++; + } + for (size_t i = 0; i < in_data_index.size(); ++i) { + in_data_index[i] = counter++; + } + for (size_t i = 0; i < out_data_index.size(); ++i) { + out_data_index[i] = counter++; + } + auto args_index = fwd->DeclareBackwardDependency( + out_grad_index, in_data_index, out_data_index); + std::vector args_array(counter, nullptr); + CHECK_EQ(args_index.size(), in_data.size()); + for (size_t i = 0; i < in_data.size(); ++i) { + args_array[args_index[i]] = &in_data[i]; + } + std::vector in_grad_ptr(out_data.size()); + for (size_t i = 0; i < in_grad_ptr.size(); ++i) { + in_grad_ptr[i] = (void*)&out_data[i]; // NOLINT(*) + } + auto remap_index = fwd->BackwardInplaceOption( + out_grad_index, in_data_index, out_data_index, in_grad_ptr); + std::vector > remap(remap_index.size()); + for (size_t i = 0; i < remap_index.size(); ++i) { + CHECK_NE(args_array[remap_index[i].first], nullptr) + << "BackwardInplaceOption uses input that is returned by DeclareBackwardDependency"; + remap[i].first = *args_array[remap_index[i].first]; + remap[i].second = *static_cast(remap_index[i].second); + } + return std::move(remap); + } +} + +inline GraphExecutor::OpExecEntry +GraphExecutor::GetOpExecEntry(uint32_t nid) { + OpNode& op_node = op_nodes_[nid]; + Operator *op = op_node.op.get(); + std::vector req; + std::vector in_data, out_data; + in_data.reserve(graph_.nodes[nid].inputs.size()); + out_data.reserve(op_node.outputs.size()); + req.reserve(op_node.outputs.size()); + + OpExecEntry exec; + for (const DataEntryInfo& out : op_node.outputs) { + out_data.push_back(out.data.data()); + exec.mutate_vars.push_back(out.data.var()); + req.push_back(out.op_req); + } + + for (StaticGraph::DataEntry e : graph_.nodes[nid].inputs) { + const DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + in_data.push_back(info.data.data()); + // skip inplace since they already appear in mutate vars + if (info.inplace_op_id != static_cast(nid)) { + exec.use_vars.push_back(info.data.var()); + } + } + + OpContext* op_ctx_ptr = &op_node.op_ctx; + exec.exec_fun = [op, op_ctx_ptr, in_data, req, out_data] (RunContext ctx) { + op_ctx_ptr->run_ctx = ctx; + op->Forward(*op_ctx_ptr, in_data, req, out_data); + }; + return std::move(exec); +} + +void GraphExecutor::InitGraph(Symbol symbol, Context ctx, bool need_backward) { + // initialize all internal daa structures + symbol.ToStaticGraph(&graph_); + num_forward_nodes_ = graph_.nodes.size(); + if (need_backward) { + graph_.MakeBackwardPass(&head_grad_nodes_, &arg_grads_); + } + // reorganize so backward node always follow forward + // note that this may not be the case, because existence of head_grad_nodes + std::vector topo = graph_.TopoSort(); + std::vector backward; + for (uint32_t nid : topo) { + if (nid < num_forward_nodes_) { + topo_order_.push_back(nid); + } else { + backward.push_back(nid); + } + } + topo_order_.insert(topo_order_.end(), backward.begin(), backward.end()); + // setup all the operator nodes data structure + op_nodes_.resize(graph_.nodes.size()); + for (size_t i = 0; i < graph_.nodes.size(); ++i) { + op_nodes_[i].ctx = ctx; + op_nodes_[i].outputs.resize(GetNumOutputs(i)); + } +} + +void GraphExecutor::InitDataEntryInfo(const std::vector &in_args, + const std::vector &arg_grad_store, + const std::vector &grad_req_type) { + CHECK_EQ(arg_grad_store.size(), grad_req_type.size()); + CHECK_EQ(in_args.size(), graph_.arg_nodes.size()); + // bind inputs + for (size_t i = 0; i < graph_.arg_nodes.size(); ++i) { + DataEntryInfo &info = op_nodes_[graph_.arg_nodes[i]].outputs[0]; + info.type = kBindByExternal; + info.data = in_args[i]; + } + // setup ref for head nodes + for (StaticGraph::DataEntry e : graph_.heads) { + DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + ++info.ref_count; + op_nodes_[e.source_id].activated = true; + } + // need Backward pass + if (arg_grads_.size() != 0) { + CHECK_EQ(arg_grads_.size(), arg_grad_store.size()); + CHECK_EQ(arg_grads_.size(), grad_req_type.size()); + // setup gradient placeholders + for (size_t i = 0; i < arg_grads_.size(); ++i) { + if (grad_req_type[i] == kNullOp) continue; + CHECK_NE(grad_req_type[i], kWriteInplace) + << "Gradient request can only be nullop, add, write"; + std::vector &grad_source = arg_grads_[i]; + CHECK_GE(grad_source.size(), 1); + // TODO(bing) add a aggregation node here + if (grad_source.size() > 1) { + CHECK_EQ(grad_req_type[i], kAddTo) + << "The gradient contains multiple variables,"; + } + for (StaticGraph::DataEntry e : grad_source) { + DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + info.type = kBindByExternal; + info.op_req = grad_req_type[i]; + info.data = arg_grad_store[i]; + ++info.ref_count; + op_nodes_[e.source_id].activated = true; + } + } + // setup head gradient + for (uint32_t nid : head_grad_nodes_) { + DataEntryInfo &info = op_nodes_[nid].outputs[0]; + info.type = kTobeBindByExternal; + } + } + // update ref counters for all other nodes, in reverse topo order + for (auto it = topo_order_.rbegin(); it != topo_order_.rend(); ++it) { + uint32_t nid = *it; + if (op_nodes_[nid].activated) { + for (StaticGraph::DataEntry e : graph_.nodes[nid].inputs) { + DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + ++info.ref_count; + op_nodes_[e.source_id].activated = true; + } + } + } + + // shape inference + std::vector > out_shapes(op_nodes_.size()); + for (size_t i = 0; i < out_shapes.size(); ++i) { + out_shapes[i].resize(op_nodes_[i].outputs.size()); + } + for (size_t i = 0; i < graph_.arg_nodes.size(); ++i) { + out_shapes[graph_.arg_nodes[i]][0] = in_args[i].shape(); + } + CHECK(graph_.InferNodeShapes(topo_order_, &out_shapes)) + << "Shape inference cannot be complete in bind"; + for (size_t i = 0; i < out_shapes.size(); ++i) { + for (size_t j = 0; j < out_shapes[i].size(); ++j) { + op_nodes_[i].outputs[j].shape = out_shapes[i][j]; + } + } +} + +void GraphExecutor::InitDataEntryMemory() { + // use allocator to allocate memory. + GraphStorageAllocator allocator(&graph_); + + for (size_t i = 0; i < topo_order_.size(); ++i) { + uint32_t nid = topo_order_[i]; + if (!op_nodes_[nid].activated) continue; + if (graph_.nodes[nid].is_variable()) continue; + + // check inplace option + std::vector in_data; + in_data.reserve(graph_.nodes[nid].inputs.size()); + // check inputs are ready. + for (StaticGraph::DataEntry e : graph_.nodes[nid].inputs) { + DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + CHECK_NE(info.type, kNotInitialized); + CHECK_NE(info.ref_count, 0); + in_data.push_back(&info); + } + std::vector out_data(op_nodes_[nid].outputs.size()); + for (size_t i = 0; i < op_nodes_[nid].outputs.size(); ++i) { + out_data[i] = &op_nodes_[nid].outputs[i]; + CHECK_NE(out_data[i]->type, kInternalAllocated); + } + auto inplace = GetInplaceOption(nid, in_data, out_data); + for (std::pair kv : inplace) { + DataEntryInfo* in = kv.first; + DataEntryInfo* out = kv.second; + if (in->ref_count == 1 && + in->type == kInternalAllocated && + out->type == kNotInitialized) { + // we can only do inplace if we are last user of in + // and out is not initialized. + out->type = kInternalAllocated; + out->op_req = kWriteInplace; + out->storage_id = in->storage_id; + // set inplace op id + in->ref_count = 0; + in->inplace_op_id = static_cast(nid); + } + } + // allocate output, + for (DataEntryInfo *out : out_data) { + if (out->op_req == kNullOp && out->ref_count != 0) { + out->op_req = kWriteTo; + } + if (out->type == kNotInitialized) { + out->storage_id = allocator.Request( + op_nodes_[nid].ctx, out->shape, nid); + out->type = kInternalAllocated; + } + } + // then free inputs + for (DataEntryInfo *in : in_data) { + // ref_count == 0 means it is taken by inplace op + if (in->ref_count == 0) { + CHECK_EQ(in->inplace_op_id, static_cast(nid)); + continue; + } + // if we decrease it to zero, means we are ready to relase + if (--in->ref_count == 0) { + allocator.Release(in->storage_id, nid); + } + } + // check out again, if there is ref_count == 0, release it + for (DataEntryInfo *out : out_data) { + if (out->ref_count == 0) { + allocator.Release(out->storage_id, nid); + } + } + } + // one pass complete, allocate real memory + allocator.InitStorages(); + // get the real data NArray into the DataEntryInfo + for (size_t i = 0; i < topo_order_.size(); ++i) { + uint32_t nid = topo_order_[i]; + if (!op_nodes_[nid].activated) continue; + for (DataEntryInfo &out : op_nodes_[nid].outputs) { + CHECK_NE(out.type, kNotInitialized); + if (out.type == kInternalAllocated) { + out.data = allocator.Get(out.storage_id, out.shape); + } + } + } + for (StaticGraph::DataEntry e : graph_.heads) { + DataEntryInfo &info = op_nodes_[e.source_id].outputs[e.index]; + CHECK_EQ(info.type, kInternalAllocated); + heads_narray_.push_back(info.data); + } +} + +void GraphExecutor::InitOpNodes() { + for (size_t i = 0; i < topo_order_.size(); ++i) { + uint32_t nid = topo_order_[i]; + if (!op_nodes_[nid].activated) continue; + if (graph_.nodes[nid].is_variable()) continue; + OpNode& op_node = op_nodes_[nid]; + if (graph_.nodes[nid].is_forward()) { + op_node.op.reset(graph_.nodes[nid].op->CreateOperator(op_node.ctx)); + } else { + CHECK(graph_.nodes[nid].is_backward()); + op_node.op.reset(new BackwardOpWrapper( + graph_.nodes[graph_.nodes[nid].backward_source_id].op.get(), + op_nodes_[graph_.nodes[nid].backward_source_id].op)); + } + bool allow_cache = true; + for (StaticGraph::DataEntry e : graph_.nodes[nid].inputs) { + DataEntryInfo& info = op_nodes_[e.source_id].outputs[e.index]; + if (info.type == kTobeBindByExternal) allow_cache = false; + } + for (DataEntryInfo& info : op_node.outputs) { + if (info.type == kTobeBindByExternal) allow_cache = false; + } + if (allow_cache) { + op_node.cached_exec = GetOpExecEntry(nid); + } + } +} + +void GraphExecutor::RunOps(size_t topo_start, size_t topo_end) { + for (size_t i = topo_start; i < topo_end; ++i) { + uint32_t nid = topo_order_[i]; + if (!op_nodes_[nid].activated) continue; + if (graph_.nodes[nid].is_variable()) continue; + OpNode& opnode = op_nodes_[nid]; + if (opnode.cached_exec.exec_fun != nullptr) { + DAGEngine::Get()->Push( + opnode.cached_exec.exec_fun, + opnode.ctx, + opnode.cached_exec.use_vars, + opnode.cached_exec.mutate_vars); + } else { + auto exec = GetOpExecEntry(nid); + DAGEngine::Get()->Push( + exec.exec_fun, + opnode.ctx, + exec.use_vars, + exec.mutate_vars); + } + } +} + +void GraphExecutor::Forward() { + RunOps(0, num_forward_nodes_); +} + +void GraphExecutor::Backward(const std::vector &head_grads) { + CHECK_EQ(head_grad_nodes_.size(), head_grads.size()); + for (size_t i = 0; i < head_grad_nodes_.size(); ++i) { + uint32_t nid = head_grad_nodes_[i]; + CHECK(graph_.nodes[nid].is_variable()); + DataEntryInfo &info = op_nodes_[nid].outputs[0]; + CHECK_EQ(info.type, kTobeBindByExternal); + info.data = head_grads[i]; + } + RunOps(num_forward_nodes_, topo_order_.size()); +} + +Executor *Executor::Bind(Symbol symbol, + Context ctx, + const std::vector &in_args, + const std::vector &arg_grad_store, + const std::vector &grad_req_type) { + GraphExecutor *exec = new GraphExecutor(); + exec->Init(symbol, ctx, in_args, arg_grad_store, grad_req_type); + return exec; +} +} // namespace mxnet diff --git a/src/symbol/graph_executor.h b/src/symbol/graph_executor.h new file mode 100644 index 000000000000..ccc4e64a904f --- /dev/null +++ b/src/symbol/graph_executor.h @@ -0,0 +1,179 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file graph_executor.h + * \brief Executor to execute the Forward and Backward on Composition Graph. +*/ +#ifndef MXNET_SYMBOL_GRAPH_EXECUTOR_H_ +#define MXNET_SYMBOL_GRAPH_EXECUTOR_H_ + +#include +#include +#include +#include +#include "./graph_memory_allocator.h" + +namespace mxnet { +/*! + * \brief Executor of a computation graph. + */ +class GraphExecutor : public Executor { + public: + virtual ~GraphExecutor() {} + virtual void Forward(); + virtual void Backward(const std::vector &head_grads); + virtual const std::vector &heads() const { + return heads_narray_; + } + // implement Executor::Bind, only call it once. + inline void Init(Symbol symbol, + Context ctx, + const std::vector &in_args, + const std::vector &arg_grad_store, + const std::vector &grad_req_type) { + CHECK_EQ(grad_req_type.size(), arg_grad_store.size()); + bool need_backward = false; + for (auto req : grad_req_type) { + if (req != kNullOp) need_backward = true; + } + this->InitGraph(symbol, ctx, need_backward); + this->InitDataEntryInfo(in_args, arg_grad_store, grad_req_type); + this->InitDataEntryMemory(); + this->InitOpNodes(); + } + + protected: + // internal class of wrapping BackwardOp as ForwardOp + class BackwardOpWrapper; + // type of data entry + enum DataEntryType { + // memory is binded by external NArray in Bind + kBindByExternal, + // to be binded by external NArray in Forward and Backward + kTobeBindByExternal, + // internal memory, allocated + kInternalAllocated, + // internal memory, to be allocated + kNotInitialized + }; + // Additional information about each data entry + struct DataEntryInfo { + // the actual data for the entry + NArray data; + // write request to this entry + OpReqType op_req; + // the operatio node that will take + // this DataEntry as inplace input + int inplace_op_id; + // data entry type + DataEntryType type; + // shape of this entry + TShape shape; + // storage id from allocator if it is internal allocation. + GraphStorageAllocator::StorageID storage_id; + // reference count on how many times this entry is being used. + // That is how many operators and heads need this DataEntry + // this is a temporal variable that is used during initialization. + uint32_t ref_count; + // constructor + DataEntryInfo() + : op_req(kNullOp), + inplace_op_id(-1), + type(kNotInitialized), + ref_count(0) {} + }; + // all the information needed to push the op to engine + struct OpExecEntry { + // execution function for + DAGEngine::Op exec_fun; + // variables to read from + std::vector use_vars; + // variables to mutate + std::vector mutate_vars; + // constructor + OpExecEntry() : exec_fun(nullptr) {} + }; + // Information about operational node + struct OpNode { + // whether this op node is activated + bool activated; + // the context of the node + Context ctx; + // data entry information about outputs of op + std::vector outputs; + // The following parts are constructed in InitOpNodes + // the real operator + std::shared_ptr op; + // op context, that is defined for this op. + OpContext op_ctx; + // executor, this is only allocated for nodes + // whose inputs, outputs are pre-defined. + // otherwise cached_exec.exec_fun == nullptr + OpExecEntry cached_exec; + // constructor + OpNode() : activated(false) {} + }; + /*! + * \brief Get input option of a node. + * This function is overriden for both Forward and Backward node. + * + * \param node_id node index of node in StaticGraph + * \param in_data the input data entry to the node + * \param out_data the output data entry in the graph + * \return the paired inplace option. + */ + template + inline std::vector > GetInplaceOption( + uint32_t node_id, + const std::vector &in_data, + const std::vector &out_data) const; + /*! + * \brief Get resource requirement of a node. + * This function is overriden for both Forward and Backward node. + * \param node_id node index of node in StaticGraph + * \return the desired resource request. + */ + inline std::vector GetResource(uint32_t node_id) const; + /*! + * \brief Get number of outputs of a node. + * This function is overriden for both Forward and Backward node. + * \param node_id node index of node in StaticGraph + * \return the number of outputs of the node. + */ + inline int GetNumOutputs(uint32_t node_id) const; + /*! + * \brief get execution entry for an OpNode. + * This function can only be called after initialization is done. + * \param node_id the id of operational node. + * \return the execution entry. + */ + inline OpExecEntry GetOpExecEntry(uint32_t node_id); + // initialize the internal graph structure + void InitGraph(Symbol symbol, Context ctx, bool need_backward); + // initialize internal DataEntryInfo, reference counting + void InitDataEntryInfo(const std::vector &in_args, + const std::vector &arg_grad_store, + const std::vector &grad_req_type); + // initialize internal data entries NArray + void InitDataEntryMemory(); + // initialize OpNode data structure + void InitOpNodes(); + // run ops from topo order start to end + void RunOps(size_t topo_start, size_t topo_end); + // internal computational graph + StaticGraph graph_; + // topological order of nodes in computation graph + // backward nodes always follow forward nodes + std::vector topo_order_; + // number of forward nodes in the graph + size_t num_forward_nodes_; + // head gradient node in the graph, if there is backward pass + std::vector head_grad_nodes_; + // argument node in the graph, if there is backward pass + std::vector > arg_grads_; + // operational nodes + std::vector op_nodes_; + // head NArrays + std::vector heads_narray_; +}; // class GraphExecutor +} // namespace mxnet +#endif // MXNET_SYMBOL_GRAPH_EXECUTOR_H_ diff --git a/src/symbol/graph_memory_allocator.h b/src/symbol/graph_memory_allocator.h new file mode 100644 index 000000000000..4c047040a041 --- /dev/null +++ b/src/symbol/graph_memory_allocator.h @@ -0,0 +1,145 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file graph_memory_allocator.h + * \brief Memory allocator for graph executor. +*/ +#ifndef MXNET_SYMBOL_GRAPH_MEMORY_ALLOCATOR_H_ +#define MXNET_SYMBOL_GRAPH_MEMORY_ALLOCATOR_H_ + +#include +#include +#include +#include + +namespace mxnet { +/*! + * \brief Memory allocators for the GraphExecutor. + * This class is intended to be used by GraphExecutor + * to allocate the memory for each DataEntryInfo. + * + * The class algorithm works in two phase: + * (1) Planning Phase: GraphExecutor call Request and Release + * to request and release resources according to dependency. + * - Each call to Request will get a ResourceID that is used to + * identify the memory block assigned to each DataEntryInfo. + * (2) Allocating phase: GraphExecutor call InitMemory. + * - Then each DataEntry will call Get to get the real NArray. + * (3) All the memory will be freed up when reference to all the related NArray ends. + */ +class GraphStorageAllocator { + public: + /*! \brief resource index */ + typedef uint64_t StorageID; + /*! \brief constructor to the graph memory allocator */ + explicit GraphStorageAllocator(StaticGraph *graph); + /*! + * \brief Request a memory. + * \param ctx the context of the graph + * \param shape shape of the NArray we want + * \param node_id the node that is requesting the memory, used as hint. + */ + StorageID Request(Context ctx, TShape shape, uint32_t node_id); + /*! + * \brief Release a memory. + * \param id the storage ID of the memory. + * \param node_id the node id in the graph that is releasing the memory. + */ + void Release(StorageID id, uint32_t node_id); + /*! \brief Initialize all the memories requested */ + void InitStorages(); + /*! + * \brief Get the the memory allocated in planning phase. + * \param id the storage id allocated in planning phase. + * \param shape the shape of the NArray requested. + */ + NArray Get(StorageID id, TShape shape); + + private: + /*! \brief internal storage entry */ + struct StorageEntry { + /*! \brief id of the storage */ + StorageID id; + /*! \brief the context of the storage */ + Context ctx; + /*! \brief maximum size of the storage that is requested */ + size_t max_size; + /*! \brief the actual NArray to hold the data */ + NArray data; + /*! \brief constructor */ + StorageEntry() : max_size(0) {} + }; + /*! + * \brief Allocate a StorageID when Request cannot found existing ones. + * \param ctx the context of the graph + * \param shape shape of the NArray we want + */ + StorageID Alloc(Context ctx, size_t size); + + /*! \brief reference to the computation graph */ + StaticGraph *graph_; + /*! \brief all the resources available */ + std::vector > data_; + /*! + * \brief free list of storage entries, maps size to free list + */ + std::multimap free_; +}; + +// put implementation in header files for now +GraphStorageAllocator::GraphStorageAllocator(StaticGraph *graph) + : graph_(graph) {} + +GraphStorageAllocator::StorageID +GraphStorageAllocator::Alloc(Context ctx, size_t size) { + StorageID id = static_cast(data_.size()); + std::unique_ptr ptr(new StorageEntry()); + ptr->id = id; + ptr->ctx = ctx; + ptr->max_size = size; + data_.push_back(std::move(ptr)); + return id; +} + +GraphStorageAllocator::StorageID +GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) { + size_t size = shape.Size(); + if (free_.count(size) != 0) { + auto begin = free_.lower_bound(size); + auto end = free_.upper_bound(size); + // vector of possible candidates + for (auto it = begin; it != end; ++it) { + StorageEntry *e = it->second; + if (e->ctx != ctx) continue; + // Use exect matching strategy + // TODO(bing): think of other strategies, for example, rough match. + if (e->max_size != size) continue; + // find a exact match, erase from map and return + free_.erase(it); + return e->id; + } + } + // cannot find anything return a new one. + return this->Alloc(ctx, size); +} + +void GraphStorageAllocator::Release(StorageID id, uint32_t node_id) { + StorageEntry *e = data_[id].get(); + free_.insert({e->max_size, e}); +} + +void GraphStorageAllocator::InitStorages() { + for (size_t i = 0; i < data_.size(); ++i) { + StorageEntry *e = data_[i].get(); + TShape shape = mshadow::Shape1(e->max_size); + e->data = NArray(shape, e->ctx); + } +} + +NArray GraphStorageAllocator::Get(StorageID id, TShape shape) { + StorageEntry *e = data_[id].get(); + // TODO(bing): change to return e->data.Slice(0, shape.Size()).Reshape(shape); + // once we are able to get NArray that shares memory from a big chunk. + return NArray(shape, e->ctx); +} +} // namespace mxnet +#endif // MXNET_SYMBOL_GRAPH_MEMORY_ALLOCATOR_H_ diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index 3bec3427fbb3..5eb0ad14a282 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -152,6 +152,18 @@ bool StaticGraph::InferShape(std::vector *in_shape, return true; } +StaticGraph::Node StaticGraph::CreateSumNode( + const std::vector &grad_source) { + // find multiple gradients, need aggregate + std::ostringstream os_size; + Node agg_node; + agg_node.op.reset(OperatorProperty::Create("ElementWiseSum")); + os_size << grad_source.size(); + agg_node.op->Init({{"size", os_size.str()}}); + agg_node.inputs = grad_source; + return std::move(agg_node); +} + void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, std::vector > *arg_grads) { arg_grads->clear(); @@ -162,14 +174,15 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, std::map > grad_map; // allocate head gradient nodes for (DataEntry head : heads) { - uint32_t nid = static_cast(nodes.size()); - // create a variable node for gradient input - nodes.push_back(Node()); - Node &node = nodes[nid]; + Node node; std::ostringstream os; os << nodes[head.source_id].name << '_' << head.index << "_grad"; // TODO(bing): add index to name node.name = os.str(); + // node id + uint32_t nid = static_cast(nodes.size()); + nodes.push_back(std::move(node)); + // create a variable node for gradient input DataEntry igrad(nid, 0); head_grad_nodes->push_back(nid); // update gradient map @@ -204,31 +217,25 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, if (gnodes.size() == 1) { out_grad.push_back(gnodes[0]); } else { - // find multiple gradients, need aggregate - std::ostringstream os_size, os_name; - uint32_t agg_node_id = static_cast(nodes.size()); - nodes.push_back(Node()); - Node &agg_node = nodes[agg_node_id]; - agg_node.op.reset(OperatorProperty::Create("ElementWiseSum")); - os_size << gnodes.size(); - agg_node.op->Init({{"size", os_size.str()}}); + std::ostringstream os_name; + Node agg_node = StaticGraph::CreateSumNode(gnodes); os_name << nodes[nid].name << '_' << i << "_out_grad_agg"; agg_node.name = os_name.str(); - agg_node.inputs = gnodes; + uint32_t agg_node_id = static_cast(nodes.size()); + nodes.push_back(std::move(agg_node)); out_grad.push_back(DataEntry(agg_node_id, 0)); } } // Create a gradient backward node - nodes.push_back(Node()); - uint32_t grad_node_id = static_cast(nodes.size()); - Node &grad_node = nodes[grad_node_id]; + Node grad_node; // Point to the corresponding source grad_node.backward_source_id = nid; // select out the dependent inputs grad_node.inputs = nodes[nid].op->BackwardInputs( out_grad, nodes[nid].inputs, out_data); grad_node.name = nodes[nid].name + "_backward"; - + uint32_t grad_node_id = static_cast(nodes.size()); + nodes.push_back(std::move(grad_node)); // update gradient map for (size_t i = 0; i < nodes[nid].inputs.size(); ++i) { DataEntry idata = nodes[nid].inputs[i]; From 7a1832cb719fa412c85f9c3bc5de22a7f2634c51 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Wed, 19 Aug 2015 20:35:55 -0600 Subject: [PATCH 10/11] update NArray --- include/mxnet/narray.h | 66 +++++++++++++++++++++++++++++++++--------- python/mxnet/symbol.py | 10 +++---- src/c_api.cc | 3 +- src/narray/narray.cc | 18 ++++++++---- 4 files changed, 71 insertions(+), 26 deletions(-) diff --git a/include/mxnet/narray.h b/include/mxnet/narray.h index 92257b3f0269..cc5d2cf1b4a2 100644 --- a/include/mxnet/narray.h +++ b/include/mxnet/narray.h @@ -35,7 +35,7 @@ class NArray { */ NArray(const TShape &shape, Context ctx, bool delay_alloc = false) - : ptr_(new Chunk(shape, ctx, delay_alloc)) { + : ptr_(new Chunk(shape.Size(), ctx, delay_alloc)), shape_(shape), offset_(0) { } /*! * \brief constructing a static NArray that shares data with TBlob @@ -45,19 +45,20 @@ class NArray { * \param dev_id the device id this tensor sits at */ NArray(const TBlob &data, int dev_id) - : ptr_(new Chunk(data, dev_id)) { + : ptr_(new Chunk(data, dev_id)), shape_(data.shape_), offset_(0) { } /*! * \return the shape of current NArray */ inline const TShape &shape() const { - return ptr_->data.shape_; + return shape_; } /*! * \return the data TBlob */ - inline const TBlob &data() const { - return ptr_->data; + inline TBlob data() const { + return TBlob(static_cast(ptr_->shandle.dptr) + offset_, \ + shape_, ptr_->shandle.ctx.dev_mask); } /*! * \return the context of NArray, this function is only valid when the NArray is not empty @@ -123,6 +124,42 @@ class NArray { * \return the new copy */ NArray Copy(Context ctx) const; + /*! + * \brief Slice a NArray + * + * \param begin begin index in first dim + * \param end end index in first dim + * + * \return sliced NArray + */ + NArray Slice(index_t begin, index_t end) { + NArray ret = *this; + CHECK_GE(shape_.ndim(), 0) << "NArray not initialized"; + CHECK_GE(shape_[0], end) << "Chunk is smaller than required"; + size_t length = 1; + if (shape_.ndim() == 1) { + ret.offset_= begin; + } else { + for (index_t i = 1; i < shape_.ndim(); ++i) { + length *= shape_[i]; + } + ret.offset_ = begin * length; + } + return ret; + } + /*! + * \brief Reshape current NArray + * + * \param shape new shape + * \return NArray in new shape + */ + NArray Reshape(const TShape &shape) { + CHECK_GE(shape_.Size(), shape.Size()) \ + << "required shape is larger than chunk"; + NArray ret = *this; + ret.shape_ = shape; + return ret; + } private: /*! \brief the real data chunk that backs NArray */ @@ -131,8 +168,6 @@ class NArray { Storage::Handle shandle; /*! \brief variable from DAG engine */ DAGEngine::Variable var; - /*! \brief holds the data content */ - TBlob data; /*! * \brief if this is true, this means the data do not come * from Storage, and do not need to be freed @@ -146,25 +181,25 @@ class NArray { } /*! \brief construct from static data */ Chunk(const TBlob &data, int dev_id) - : data(data), - static_data(true), + : static_data(true), delay_alloc(false) { var = DAGEngine::Get()->NewVar(); shandle.ctx = Context(data.dev_mask_, dev_id); + shandle.dptr = data.dptr_; + shandle.size = data.shape_.Size() * sizeof(real_t); } /*! \brief construct a new chunk */ - Chunk(const TShape &shape, Context ctx, bool delay_alloc_) + Chunk(uint64_t size, Context ctx, bool delay_alloc_) : static_data(false), delay_alloc(true) { var = DAGEngine::Get()->NewVar(); - data.shape_ = shape; + shandle.size = size * sizeof(real_t); shandle.ctx = ctx; if (!delay_alloc_) this->CheckAndAlloc(); } /*! \brief check if delay alloc is on, do alloc if not yet done */ inline void CheckAndAlloc(void) { if (delay_alloc) { - shandle = Storage::Get()->Alloc(data.shape_.Size() * sizeof(real_t), shandle.ctx); - data = TBlob(static_cast(shandle.dptr), data.shape_, shandle.ctx.dev_mask); + shandle = Storage::Get()->Alloc(shandle.size, shandle.ctx); delay_alloc = false; } } @@ -183,6 +218,11 @@ class NArray { }; /*! \brief internal data of NArray */ std::shared_ptr ptr_; + /*! \brief shape of current NArray */ + TShape shape_; + /*! \brief offset in chunk */ + size_t offset_; + // add friend to helper functions friend void CopyFromTo(const NArray &from, NArray *to); template diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index c491eacb1ac4..dbe60ddb7a78 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -253,7 +253,7 @@ def forward(self, inputs): if not isinstance(obj, NArray): raise TypeError("inputs must be NArray") narray = c_array([item.handle for item in inputs]) - check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(inputs), narray)) + check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(inputs), narray))) def backward(self, grads): """do backward on heads' grads @@ -269,7 +269,7 @@ def backward(self, grads): if not isinstance(obj, NArray): raise TypeError("inputs must be NArray") narray = c_array(NArrayHandle, [item.handle for item in grads]) - check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(grads), narray)) + check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(grads), narray))) def heads(self): """list all heads' output narray @@ -311,9 +311,9 @@ def Bind(sym, ctx, args, args_grad, reqs): raise TypeError("Context type error") args_handle = c_array(NArrayHandle, [item.handle for item in args]) args_grad_handle = c_array(NArrayHandle, [item.handle for item in args_grad]) - reqs_array = c_array(mx_uint, mx_uint(enum[item]) for item in req) + reqs_array = c_array(mx_uint, [mx_uint(enum[item]) for item in req]) handle = ExecutorHandle() check_call(_LIB.MXExecutorBind(handle, sym.handle, \ mx_uint(ctx.device_mask), mx_uint(ctx.device_id), \ - args_handle, args_grad_handle, reqs_array) - return Executor(handle); \ No newline at end of file + mx_uint(len(args), args_handle, args_grad_handle, reqs_array))) + return Executor(handle) diff --git a/src/c_api.cc b/src/c_api.cc index ab7899767555..d861ac00fc41 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -528,7 +528,6 @@ MXNET_DLL int MXExecutorBind(ExecutorHandle handle, NArrayHandle *arg_grad_store, mx_uint *grad_req_type) { API_BEGIN(); - Executor *exec = static_cast(handle); Symbol *symb = static_cast(symbol_handle); Context ctx = Context(dev_mask, dev_id); NArray **in_args_ptr = reinterpret_cast(in_args); @@ -541,7 +540,7 @@ MXNET_DLL int MXExecutorBind(ExecutorHandle handle, arg_grad_vec.push_back(*(arg_grad_ptr[i])); grad_req_vec.push_back(static_cast(grad_req_type[i])); } - handle = exec->Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); + handle = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); API_END(); } diff --git a/src/narray/narray.cc b/src/narray/narray.cc index 831041bd1496..3618a38c9d59 100644 --- a/src/narray/narray.cc +++ b/src/narray/narray.cc @@ -37,14 +37,16 @@ inline void BinaryOp(const NArray &lhs, case cpu::kDevMask: DAGEngine::Get()->Push([lhs, rhs, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Eval(lhs.ptr_->data, rhs.ptr_->data, &ret.ptr_->data, ctx); + TBlob tmp = ret.data(); + narray::Eval(lhs.data(), rhs.data(), &tmp, ctx); }, lhs.ctx(), {lhs.ptr_->var, rhs.ptr_->var}, {ret.ptr_->var}); break; #if MXNET_USE_CUDA case gpu::kDevMask: DAGEngine::Get()->Push([lhs, rhs, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Eval(lhs.ptr_->data, rhs.ptr_->data, &ret.ptr_->data, ctx); + TBlob tmp = ret.data(); + narray::Eval(lhs.data(), rhs.data(), &tmp, ctx); }, lhs.ctx(), {lhs.ptr_->var, rhs.ptr_->var}, {ret.ptr_->var}); break; #endif @@ -64,14 +66,16 @@ void CopyFromTo(const NArray &from, NArray *to) { if (a == cpu::kDevMask && b == cpu::kDevMask) { DAGEngine::Get()->Push([from, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Copy(from.ptr_->data, &ret.ptr_->data, + TBlob tmp = ret.data(); + narray::Copy(from.data(), &tmp, from.ctx(), ret.ctx(), ctx); }, from.ctx(), {from.ptr_->var}, {ret.ptr_->var}); } else if (a == cpu::kDevMask && b == gpu::kDevMask) { #if MXNET_USE_CUDA DAGEngine::Get()->Push([from, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Copy(from.ptr_->data, &ret.ptr_->data, + TBlob tmp = ret.data(); + narray::Copy(from.data(), &tmp, from.ctx(), ret.ctx(), ctx); }, ret.ctx(), {from.ptr_->var}, {ret.ptr_->var}); #else @@ -81,7 +85,8 @@ void CopyFromTo(const NArray &from, NArray *to) { #if MXNET_USE_CUDA DAGEngine::Get()->Push([from, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Copy(from.ptr_->data, &ret.ptr_->data, + TBlob tmp = ret.data(); + narray::Copy(from.data(), &tmp, from.ctx(), ret.ctx(), ctx); }, from.ctx(), {from.ptr_->var}, {ret.ptr_->var}); #else @@ -91,7 +96,8 @@ void CopyFromTo(const NArray &from, NArray *to) { #if MXNET_USE_CUDA DAGEngine::Get()->Push([from, ret](RunContext ctx) { ret.ptr_->CheckAndAlloc(); - narray::Copy(from.ptr_->data, &ret.ptr_->data, + TBlob tmp = ret.data(); + narray::Copy(from.data(), &tmp, from.ctx(), ret.ctx(), ctx); }, from.ctx(), {from.ptr_->var}, {ret.ptr_->var}); #else From 910738d84b9906673711af35c522cbdf778f856f Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Thu, 20 Aug 2015 00:14:58 -0600 Subject: [PATCH 11/11] MNIST is OK --- include/mxnet/c_api.h | 151 ++++++++-------------------- include/mxnet/narray.h | 11 +- include/mxnet/operator.h | 2 +- python/mxnet/executor.py | 57 +++++++++++ python/mxnet/symbol.py | 125 ++++++----------------- python/test_mnist.py | 55 +++++----- src/c_api.cc | 55 +++++----- src/symbol/graph_executor.cc | 39 +++++-- src/symbol/graph_executor.h | 7 ++ src/symbol/graph_memory_allocator.h | 36 +++---- 10 files changed, 256 insertions(+), 282 deletions(-) create mode 100644 python/mxnet/executor.py diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index cd0b6b2206c1..38132cb169a5 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -34,12 +34,10 @@ typedef void *AtomicSymbolCreator; typedef void *SymbolHandle; /*! \brief handle to a AtomicSymbol */ typedef void *AtomicSymbolHandle; -/*! \brief handle to a NArrayOperator */ -typedef void *OperatorHandle; -/*! \brief handle to a DataIterator */ -typedef void *DataIterHandle; /*! \brief handle to an Executor */ typedef void *ExecutorHandle; +/*! \brief handle to a DataIterator */ +typedef void *DataIterHandle; /* * \brief return str message of the last error * all function in this file will return 0 when success @@ -353,63 +351,59 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, const mx_uint ***out_shape_data, int *complete); //-------------------------------------------- -// Part 4: operator interface on NArray +// Part 4: Executor interface //-------------------------------------------- /*! - * \brief create operator from symbol - * \param sym the symbol to create operator from - * \param dev_mask device mask to indicate the device type - * \param dev_id the device id we want to bind the symbol to - * \param out the corresponding function handle - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXOpCreate(SymbolHandle sym, - int dev_mask, - int dev_id, - OperatorHandle *out); -/*! - * \brief free the operator handle - * \param op the handle to be freed + * \brief Executor forward method + * + * \param handle executor handle * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXOpFree(OperatorHandle op); +MXNET_DLL int MXExecutorForward(ExecutorHandle handle); /*! - * \brief return an array to describe the arguments - * of this operator - * \param out_size the size of output array - * \param out_array the array of parameter requirments + * \brief Excecutor run backward + * + * \param handle execute handle + * \param len lenth + * \param head_grads NArray handle for heads' gradient + * * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXOpDescribeArgs(mx_uint *out_size, - int **out_array); +MXNET_DLL int MXExecutorBackward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *head_grads); + /*! - * \brief call forward on the operator - * \param op the operator handle - * \param in_data array of input narray to the operator - * \param out_data array of output NArray to hold the result + * \brief Get executor's head NArray + * + * \param handle executor handle + * \param out_size output narray vector size + * \param out out put narray handles * \return 0 when success, -1 when failure happens */ -MXNET_DLL int MXOpForward(OperatorHandle op, - NArrayHandle *in_data, - NArrayHandle *out_data); +MXNET_DLL int MXExecutorHeads(ExecutorHandle handle, + mx_uint *out_size, + NArrayHandle **out); + /*! - * \brief call backward on the operator - * \param op the operator handle - * \param grad_next array of output gradients - * \param in_data array of input narray to the operator - * \param out_data array of output narray to the operator - * \param out_grad array to holds the gradient on these input - * can be NULL if that position request is kNullOp - * \param reqs gradient request type + * \brief Generate Executor from symbol + * + * \param symbol_handle symbol handle + * \param len length + * \param in_args in args array + * \param arg_grad_store arg grads handle array + * \param grad_req_type grad req array + * \param out output executor handle * \return 0 when success, -1 when failure happens - * \sa mxnet::Operator::GradReqType */ -MXNET_DLL int MXOpBackward(OperatorHandle op, - NArrayHandle *grad_next, - NArrayHandle *in_data, - NArrayHandle *out_data, - NArrayHandle *out_grad, - mx_uint *reqs); +MXNET_DLL int MXExecutorBind(SymbolHandle symbol_handle, + int dev_mask, + int dev_id, + mx_uint len, + NArrayHandle *in_args, + NArrayHandle *arg_grad_store, + mx_uint *grad_req_type, + ExecutorHandle *out); //-------------------------------------------- // Part 5: IO Interface @@ -460,65 +454,4 @@ MXNET_DLL int MXIOGetData(DataIterHandle handle, MXNET_DLL int MXIOGetLabel(DataIterHandle handle, NArrayHandle *out); -//-------------------------------------------- -// Part 56: Executor -//-------------------------------------------- -/*! - * \brief Executor forward method - * - * \param handle executor handle - * \param len length of narray handles - * \param input input NArray handles - * - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXExecutorForward(ExecutorHandle handle, - mx_uint len, - NArrayHandle *input); - -/** - * \brief Excecutor run backward - * - * \param handle execute handle - * \param len lenth - * \param head_grads NArray handle for heads' gradient - * - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXExecutorBackward(ExecutorHandle handle, - mx_uint len, - NArrayHandle *head_grads); - -/** - * \brief Get executor's head NArray - * - * \param handle executor handle - * \param out_size output narray vector size - * \param out out put narray handles - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXExecutorHeads(ExecutorHandle handle, - mx_uint *out_size, - NArrayHandle **out); - -/** - * \brief Generate Executor from symbol - * - * \param handle executor hanlde (to be generated) - * \param symbol_handle symbol handle - * \param len length - * \param in_args in args array - * \param arg_grad_store arg grads handle array - * \param grad_req_type grad req array - * \return 0 when success, -1 when failure happens - */ -MXNET_DLL int MXExecutorBind(ExecutorHandle handle, - SymbolHandle symbol_handle, - int dev_mask, - int dev_id, - mx_uint len, - NArrayHandle *in_args, - NArrayHandle *arg_grad_store, - mx_uint *grad_req_type); - #endif // MXNET_C_API_H_ diff --git a/include/mxnet/narray.h b/include/mxnet/narray.h index cc5d2cf1b4a2..ed2b72bc4cc5 100644 --- a/include/mxnet/narray.h +++ b/include/mxnet/narray.h @@ -126,13 +126,13 @@ class NArray { NArray Copy(Context ctx) const; /*! * \brief Slice a NArray - * + * * \param begin begin index in first dim * \param end end index in first dim - * + * * \return sliced NArray */ - NArray Slice(index_t begin, index_t end) { + inline NArray Slice(index_t begin, index_t end) const { NArray ret = *this; CHECK_GE(shape_.ndim(), 0) << "NArray not initialized"; CHECK_GE(shape_[0], end) << "Chunk is smaller than required"; @@ -145,15 +145,16 @@ class NArray { } ret.offset_ = begin * length; } + ret.shape_[0] = end - begin; return ret; } /*! * \brief Reshape current NArray - * + * * \param shape new shape * \return NArray in new shape */ - NArray Reshape(const TShape &shape) { + inline NArray Reshape(const TShape &shape) const { CHECK_GE(shape_.Size(), shape.Size()) \ << "required shape is larger than chunk"; NArray ret = *this; diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h index bc1d79b20b38..e60afe6948a7 100644 --- a/include/mxnet/operator.h +++ b/include/mxnet/operator.h @@ -330,7 +330,7 @@ class OperatorProperty { const std::vector &out_data) const { int counter = 0; std::vector out_grad_index(out_grad.size()); - std::vector in_data_index(out_data.size()); + std::vector in_data_index(in_data.size()); std::vector out_data_index(out_data.size()); for (size_t i = 0; i < out_grad_index.size(); ++i) { out_grad_index[i] = counter++; diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py new file mode 100644 index 000000000000..7352bfe2f289 --- /dev/null +++ b/python/mxnet/executor.py @@ -0,0 +1,57 @@ +# coding: utf-8 +""" code for executor. """ +from __future__ import absolute_import + +import ctypes +from .base import _LIB +from .base import c_array, c_str, mx_uint, NArrayHandle, ExecutorHandle +from .base import check_call +from .narray import NArray + +class Executor(object): + """ Executor is the actual executing object of MXNet.""" + def __init__(self, handle): + """Init an executor from handle + + Parameters + ---------- + handle: ExecutorHandle + ExecutorHandle generated by calling Bind + """ + if not isinstance(handle, ExecutorHandle): + raise TypeError("Handle type error") + self.handle = handle + + def forward(self): + """Do forward.""" + check_call(_LIB.MXExecutorForward(self.handle)) + + def backward(self, grads): + """Do backward on heads' gradient. + + Parameters + ---------- + grads: Array of NArray + heads' gradient + """ + for obj in grads: + if not isinstance(obj, NArray): + raise TypeError("inputs must be NArray") + narray = c_array(NArrayHandle, [item.handle for item in grads]) + check_call(_LIB.MXExecutorBackward(self.handle, len(grads), narray)) + + def heads(self): + """list all heads' output narray + + Returns + ------- + A list of narray binded to the heads of executor. + """ + # TODO: think of access, make heads read only. + # (consider support read only NArray(NArrayView)) + # Otherwise some of the internal might depends on out_data + # if user set the content of the head, the backward behavior can be incorrect. + out_size = mx_uint() + handles = ctypes.POINTER(NArrayHandle)() + check_call(_LIB.MXExecutorHeads(self.handle, ctypes.byref(out_size), ctypes.byref(handles))) + return [NArray(NArrayHandle(handles[i])) for i in range(out_size.value)] diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py index dbe60ddb7a78..6c72442cb3f9 100644 --- a/python/mxnet/symbol.py +++ b/python/mxnet/symbol.py @@ -5,13 +5,14 @@ import ctypes from .base import _LIB -from .base import c_array, c_str, mx_uint, NArrayHandle, ExecutorHandle -from .base import SymbolHandle +from .base import c_array, c_str, mx_uint, NArrayHandle, ExecutorHandle, SymbolHandle from .base import check_call from .narray import NArray +from .context import Context +from .executor import Executor class Symbol(object): - """SymbolCreator is a function that takes Param and return symbol""" + """Symbol is symbolic graph of the mxnet.""" _registry = None @staticmethod @@ -224,96 +225,36 @@ def debug_str(self): self.handle, ctypes.byref(debug_str))) return debug_str.value -class Executor(object): - """handle of executor""" - handle = None - def __init__(self, handle): - """Init an executor from handle + def bind(self, ctx, args, args_grad, reqs): + """bind current symbol to get an executor. Parameters ---------- - handle: ExecutorHandle - ExecutorHandle generated by calling Bind - """ - if not isinstance(ExecutorHandle): - raise TypeError("Handle type error") - self.handle = handle - - def forward(self, inputs): - """do forward on inputs data - - Parameters - ---------- - inputs: Array of NArray - inputs narray to executor - """ - if self.handle == None: - raise Exception("Bind symbol before use executor") - for obj in inputs: - if not isinstance(obj, NArray): - raise TypeError("inputs must be NArray") - narray = c_array([item.handle for item in inputs]) - check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(inputs), narray))) - - def backward(self, grads): - """do backward on heads' grads - - Parameters - ---------- - grads: Array of NArray - heads' gradient - """ - if self.handle == None: - raise Exception("Bind symbol before use executor") - for obj in grads: - if not isinstance(obj, NArray): - raise TypeError("inputs must be NArray") - narray = c_array(NArrayHandle, [item.handle for item in grads]) - check_call(_LIB.MXExecutorForward (self.hanlde, mx_uint(len(grads), narray))) - - def heads(self): - """list all heads' output narray - - Returns - ------- - a list of narray binded to the heads of executor + ctx: Context + context executor to run on + args: Array of NArray + input args to the symbol + args_grad: Array of NArray + input args' gradient + reqs: Array of enum + graident requirements """ - if self.handle == None: - raise Exception("Bind symbol before use executor") - out_size = mx_uint() - handles = ctypes.POINTER(ctypes.POINTER(NArrayHandle))() - check_call(_LIB.MXExecutorHeads(self.handle, ctypes.byref(out_szie), narrays)) - return [NArray(handle[i]) for i in xrange(out_size)] - - -def Bind(sym, ctx, args, args_grad, reqs): - """Bind a symbol to get an executor - - Parameters - ---------- - sym: Symbol - symbol to be binded - ctx: Context - context executor to run on - args: Array of NArray - input args to the symbol - args_grad: Array of NArray - input args' gradient - reqs: Array of enum - graident requirements - """ - """gradient requirements enum""" - enum = {"null" : 0, "write_to" : 1, "in_place":2, "add_to" : 3} - - if not isinstance(sym, Symbol): - raise TypeError("Symbol type error") - if not isinstance(ctx, Context): - raise TypeError("Context type error") - args_handle = c_array(NArrayHandle, [item.handle for item in args]) - args_grad_handle = c_array(NArrayHandle, [item.handle for item in args_grad]) - reqs_array = c_array(mx_uint, [mx_uint(enum[item]) for item in req]) - handle = ExecutorHandle() - check_call(_LIB.MXExecutorBind(handle, sym.handle, \ - mx_uint(ctx.device_mask), mx_uint(ctx.device_id), \ - mx_uint(len(args), args_handle, args_grad_handle, reqs_array))) - return Executor(handle) + # TODO(bing): consider a more friendly interface + # For example, pass in args_grad by dict + + enum = {"null" : 0, "write_to" : 1, "in_place":2, "add_to" : 3} + if not isinstance(ctx, Context): + raise TypeError("Context type error") + args_handle = c_array(NArrayHandle, [item.handle for item in args]) + args_grad_handle = c_array(NArrayHandle, [item.handle for item in args_grad]) + reqs_array = c_array(mx_uint, [mx_uint(enum[item]) for item in reqs]) + handle = ExecutorHandle() + check_call(_LIB.MXExecutorBind(self.handle, + mx_uint(ctx.device_mask), + mx_uint(ctx.device_id), + len(args), + args_handle, + args_grad_handle, + reqs_array, + ctypes.byref(handle))) + return Executor(handle) diff --git a/python/test_mnist.py b/python/test_mnist.py index 71d79dd607e6..f9f37d2e82e3 100644 --- a/python/test_mnist.py +++ b/python/test_mnist.py @@ -4,10 +4,13 @@ import os, cPickle, gzip def Softmax(x): + batch, nidden = x.shape maxes = np.max(x, axis=1) - x -= maxes.reshape(maxes.shape[0], 1) - e = np.exp(x) - return e / np.sum(e, axis=1) + x -= maxes.reshape(batch, 1) + x = np.exp(x) + norm = np.sum(x, axis=1) + prob = x / norm.reshape((batch, 1)) + return prob def CalAcc(out, label): pred = np.argmax(out, axis=1) @@ -63,45 +66,47 @@ def Get(self): data = mx.sym.Variable('data') fc1 = mx.sym.FullyConnected(data=data, name='fc1', num_hidden=160) act1 = mx.sym.Activation(data = fc1, name='relu1', type="relu") -fc2 = mx.sym.FullyConnected(data=act1, name='fc2', num_hidden=10) +fc2 = mx.sym.FullyConnected(data = act1, name='fc2', num_hidden=10) args_list = fc2.list_arguments() - # infer shape data_shape = (batch_size, 784) arg_shapes, out_shapes = fc2.infer_shape(data=data_shape) arg_narrays = [mx.narray.create(shape) for shape in arg_shapes] grad_narrays = [mx.narray.create(shape) for shape in arg_shapes] mom_narrays = [mx.narray.create(shape) for shape in arg_shapes] -out_narray = mx.narray.create(out_shapes[0]) inputs = dict(zip(args_list, arg_narrays)) +np.random.seed(0) # set random weight for name, narray in inputs.items(): if "weight" in name: - narray.numpy[:, :] = np.random.uniform(-0.01, 0.01, narray.numpy.shape) - + narray.numpy[:, :] = np.random.uniform(-0.001, 0.001, narray.numpy.shape) + if "bias" in name: + narray.numpy[:] = 0.0 +req = ['write_to' for i in range(len(arg_narrays))] # bind executer -# exec = bind(fc2, args_narray, grad_narray, req) +# TODO(bing): think of a better bind interface +executor = fc2.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req) # update +out_narray = executor.heads()[0] +grad_narray = mx.narray.create(out_narray.shape) + epoch = 10 momentum = 0.9 -lr = 0.01 +lr = 0.001 wd = 0.0004 def Update(mom, grad, weight): - if len(mom.numpy.shape) == 1: - mom.numpy[:] = mom.numpy * momentum - lr * (grad.numpy + wd * weight.numpy) - else: - mom.numpy[:, :] = mom.numpy * momentum - lr * (grad.numpy + wd * weight.numpy) - weight += mom + weight.numpy[:] -= lr * grad.numpy[:] block = zip(mom_narrays, grad_narrays, arg_narrays) -train = MNISTIter("train") -valid = MNISTIter("valid") +train = MNISTIter("train", batch_size) +valid = MNISTIter("valid", batch_size) + for i in xrange(epoch): # train print "Epoch %d" % i @@ -109,18 +114,22 @@ def Update(mom, grad, weight): val_acc = 0.0 while train.Next(): data, label = train.Get() - inputs["data"].numpy[:,:] = data - # exec.Forward(args_narray) + inputs["data"].numpy[:] = data + executor.forward() + out_narray.numpy[:] = Softmax(out_narray.numpy) train_acc += CalAcc(out_narray.numpy, label) - SetGradient(out_narray.numpy, label) - # exec.Backward(out_narray) + grad_narray.numpy[:] = out_narray.numpy + SetGradient(grad_narray.numpy, label) + executor.backward([grad_narray]) + for mom, grad, weight in block: Update(mom, grad, weight) + # evaluate while valid.Next(): data, label = valid.Get() - inputs["data"].numpy[:,:] = data - # exec.Forward([ inputs["data"] ]) + inputs["data"].numpy[:] = data + executor.forward() val_acc += CalAcc(out_narray.numpy, label) print "Train Acc: ", train_acc / train.nbatch print "Valid Acc: ", val_acc / valid.nbatch diff --git a/src/c_api.cc b/src/c_api.cc index d861ac00fc41..3d5e03cc0748 100644 --- a/src/c_api.cc +++ b/src/c_api.cc @@ -40,6 +40,8 @@ struct MXAPIThreadLocalEntry { std::vector ret_vec_str; /*! \brief result holder for returning string pointers */ std::vector ret_vec_charp; + /*! \brief result holder for returning handles */ + std::vector ret_handles; /*! \brief result holder for returning shapes */ std::vector arg_shapes, out_shapes; /*! \brief result holder for returning shape dimensions */ @@ -481,52 +483,53 @@ int MXSymbolInferShape(SymbolHandle sym, API_END(); } -MXNET_DLL int MXExecutorForward(ExecutorHandle handle, - mx_uint len, - NArrayHandle *args) { +int MXExecutorForward(ExecutorHandle handle) { API_BEGIN(); Executor *exec = static_cast(handle); - CHECK_EQ(len, 0) - << "forward do not take narray for now"; - // TODO(bing): remove args for now exec->Forward(); API_END(); } - -MXNET_DLL int MXExecutorBackward(ExecutorHandle handle, - mx_uint len, - NArrayHandle *head_grads) { +int MXExecutorBackward(ExecutorHandle handle, + mx_uint len, + NArrayHandle *head_grads) { API_BEGIN(); Executor *exec = static_cast(handle); std::vector narrays; NArray **args_ptr = reinterpret_cast(head_grads); for (mx_uint i = 0; i < len; ++i) { - narrays.push_back(*(args_ptr[i])); + narrays.push_back(*args_ptr[i]); } exec->Backward(narrays); API_END(); } - -MXNET_DLL int MXExecutorHeads(ExecutorHandle handle, - mx_uint *out_size, - NArrayHandle **out) { +int MXExecutorHeads(ExecutorHandle handle, + mx_uint *out_size, + NArrayHandle **out) { + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); Executor *exec = static_cast(handle); - std::vector ret = exec->heads(); - + std::vector heads = exec->heads(); + ret->ret_handles.resize(heads.size()); + for (size_t i = 0; i < heads.size(); ++i) { + NArray *ptr = new NArray(); + *ptr = heads[i]; + ret->ret_handles[i] = ptr; + } + *out_size = heads.size(); + *out = dmlc::BeginPtr(ret->ret_handles); API_END(); } -MXNET_DLL int MXExecutorBind(ExecutorHandle handle, - SymbolHandle symbol_handle, - int dev_mask, - int dev_id, - mx_uint len, - NArrayHandle *in_args, - NArrayHandle *arg_grad_store, - mx_uint *grad_req_type) { +int MXExecutorBind(SymbolHandle symbol_handle, + int dev_mask, + int dev_id, + mx_uint len, + NArrayHandle *in_args, + NArrayHandle *arg_grad_store, + mx_uint *grad_req_type, + ExecutorHandle *out) { API_BEGIN(); Symbol *symb = static_cast(symbol_handle); Context ctx = Context(dev_mask, dev_id); @@ -540,7 +543,7 @@ MXNET_DLL int MXExecutorBind(ExecutorHandle handle, arg_grad_vec.push_back(*(arg_grad_ptr[i])); grad_req_vec.push_back(static_cast(grad_req_type[i])); } - handle = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); + *out = Executor::Bind(*symb, ctx, in_args_vec, arg_grad_vec, grad_req_vec); API_END(); } diff --git a/src/symbol/graph_executor.cc b/src/symbol/graph_executor.cc index 8cf50541959e..a434f22a2fc6 100644 --- a/src/symbol/graph_executor.cc +++ b/src/symbol/graph_executor.cc @@ -23,9 +23,9 @@ class GraphExecutor::BackwardOpWrapper : public Operator { explicit BackwardOpWrapper(const OperatorProperty *prop, std::shared_ptr forward_op) : op_(forward_op) { - out_grad_.resize(prop->NumReturns()); + out_grad_.resize(prop->NumVisibleReturns()); in_data_.resize(prop->ListArguments().size()); - out_data_.resize(prop->NumVisibleReturns()); + out_data_.resize(prop->NumReturns()); std::vector out_grad_ptr(out_grad_.size()); for (size_t i = 0; i < out_grad_.size(); ++i) { @@ -40,7 +40,7 @@ class GraphExecutor::BackwardOpWrapper : public Operator { out_data_ptr[i] = &out_data_[i]; } arg_data_ptr_ = prop->BackwardInputs( - out_grad_ptr, out_data_ptr, in_data_ptr); + out_grad_ptr, in_data_ptr, out_data_ptr); } // implement forward virtual void Forward(const OpContext &ctx, @@ -127,9 +127,9 @@ inline std::vector > GraphExecutor::GetInplaceOption( // forward property const OperatorProperty *fwd = graph_.nodes[node.backward_source_id].op.get(); - std::vector out_grad_index(fwd->NumReturns()); - std::vector out_data_index(fwd->NumVisibleReturns()); + std::vector out_grad_index(fwd->NumVisibleReturns()); std::vector in_data_index(fwd->ListArguments().size()); + std::vector out_data_index(fwd->NumReturns()); CHECK_EQ(in_data_index.size(), out_data.size()); int counter = 0; for (size_t i = 0; i < out_grad_index.size(); ++i) { @@ -306,7 +306,6 @@ void GraphExecutor::InitDataEntryInfo(const std::vector &in_args, void GraphExecutor::InitDataEntryMemory() { // use allocator to allocate memory. GraphStorageAllocator allocator(&graph_); - for (size_t i = 0; i < topo_order_.size(); ++i) { uint32_t nid = topo_order_[i]; if (!op_nodes_[nid].activated) continue; @@ -328,6 +327,7 @@ void GraphExecutor::InitDataEntryMemory() { CHECK_NE(out_data[i]->type, kInternalAllocated); } auto inplace = GetInplaceOption(nid, in_data, out_data); + for (std::pair kv : inplace) { DataEntryInfo* in = kv.first; DataEntryInfo* out = kv.second; @@ -363,13 +363,14 @@ void GraphExecutor::InitDataEntryMemory() { continue; } // if we decrease it to zero, means we are ready to relase - if (--in->ref_count == 0) { + --in->ref_count; + if (in->ref_count == 0 && in->type == kInternalAllocated) { allocator.Release(in->storage_id, nid); } } // check out again, if there is ref_count == 0, release it for (DataEntryInfo *out : out_data) { - if (out->ref_count == 0) { + if (out->ref_count == 0 && out->type == kInternalAllocated) { allocator.Release(out->storage_id, nid); } } @@ -445,6 +446,28 @@ void GraphExecutor::RunOps(size_t topo_start, size_t topo_end) { } } +std::string GraphExecutor::DebugStr() const { + std::ostringstream os; + os << "num_forward_nodes=" << num_forward_nodes_ << '\n'; + for (size_t i = 0; i < topo_order_.size(); ++i) { + uint32_t nid = topo_order_[i]; + if (!op_nodes_[nid].activated) continue; + os << "Op " << i << ":" << graph_.nodes[nid].name << '\n'; + for (size_t j = 0; j < op_nodes_[nid].outputs.size(); ++j) { + const DataEntryInfo &info = op_nodes_[nid].outputs[j]; + os << "\toutput[" << j << "]: shape=" << info.shape; + if (info.storage_id != GraphStorageAllocator::kBadStorageID) { + os << ", storage_id=" << info.storage_id; + } + if (info.inplace_op_id != -1) { + os << ", inplace_consumer=" << graph_.nodes[info.inplace_op_id].name; + } + os << '\n'; + } + } + return os.str(); +} + void GraphExecutor::Forward() { RunOps(0, num_forward_nodes_); } diff --git a/src/symbol/graph_executor.h b/src/symbol/graph_executor.h index ccc4e64a904f..a072eee69b68 100644 --- a/src/symbol/graph_executor.h +++ b/src/symbol/graph_executor.h @@ -39,6 +39,10 @@ class GraphExecutor : public Executor { this->InitDataEntryInfo(in_args, arg_grad_store, grad_req_type); this->InitDataEntryMemory(); this->InitOpNodes(); + // TODO(bing): remove me when things are OK + LOG(INFO) << "-----Execution memory plan-----\n" + << DebugStr() << '\n' + << "------------------------------\n"; } protected: @@ -79,6 +83,7 @@ class GraphExecutor : public Executor { : op_req(kNullOp), inplace_op_id(-1), type(kNotInitialized), + storage_id(GraphStorageAllocator::kBadStorageID), ref_count(0) {} }; // all the information needed to push the op to engine @@ -159,6 +164,8 @@ class GraphExecutor : public Executor { void InitOpNodes(); // run ops from topo order start to end void RunOps(size_t topo_start, size_t topo_end); + // get debug string + std::string DebugStr() const; // internal computational graph StaticGraph graph_; // topological order of nodes in computation graph diff --git a/src/symbol/graph_memory_allocator.h b/src/symbol/graph_memory_allocator.h index 4c047040a041..b7bd2db2081e 100644 --- a/src/symbol/graph_memory_allocator.h +++ b/src/symbol/graph_memory_allocator.h @@ -29,7 +29,9 @@ namespace mxnet { class GraphStorageAllocator { public: /*! \brief resource index */ - typedef uint64_t StorageID; + typedef int64_t StorageID; + /*! \brief bad storage id */ + static const StorageID kBadStorageID = -1; /*! \brief constructor to the graph memory allocator */ explicit GraphStorageAllocator(StaticGraph *graph); /*! @@ -103,26 +105,25 @@ GraphStorageAllocator::Alloc(Context ctx, size_t size) { GraphStorageAllocator::StorageID GraphStorageAllocator::Request(Context ctx, TShape shape, uint32_t node_id) { size_t size = shape.Size(); - if (free_.count(size) != 0) { - auto begin = free_.lower_bound(size); - auto end = free_.upper_bound(size); - // vector of possible candidates - for (auto it = begin; it != end; ++it) { - StorageEntry *e = it->second; - if (e->ctx != ctx) continue; - // Use exect matching strategy - // TODO(bing): think of other strategies, for example, rough match. - if (e->max_size != size) continue; - // find a exact match, erase from map and return - free_.erase(it); - return e->id; - } + auto begin = free_.lower_bound(size); + auto end = free_.upper_bound(size); + // vector of possible candidates + for (auto it = begin; it != end; ++it) { + StorageEntry *e = it->second; + if (e->ctx != ctx) continue; + // Use exect matching strategy + // TODO(bing): think of other strategies, for example, rough match. + if (e->max_size != size) continue; + // find a exact match, erase from map and return + free_.erase(it); + return e->id; } // cannot find anything return a new one. return this->Alloc(ctx, size); } void GraphStorageAllocator::Release(StorageID id, uint32_t node_id) { + CHECK_NE(id, kBadStorageID); StorageEntry *e = data_[id].get(); free_.insert({e->max_size, e}); } @@ -136,10 +137,9 @@ void GraphStorageAllocator::InitStorages() { } NArray GraphStorageAllocator::Get(StorageID id, TShape shape) { + CHECK_NE(id, kBadStorageID); StorageEntry *e = data_[id].get(); - // TODO(bing): change to return e->data.Slice(0, shape.Size()).Reshape(shape); - // once we are able to get NArray that shares memory from a big chunk. - return NArray(shape, e->ctx); + return e->data.Slice(0, shape.Size()).Reshape(shape); } } // namespace mxnet #endif // MXNET_SYMBOL_GRAPH_MEMORY_ALLOCATOR_H_