Skip to content

Commit

Permalink
[MXNET-753] Fallback when using non-MKLDNN supported operators (apach…
Browse files Browse the repository at this point in the history
…e#12019)

* add fallback test

* wait to read throws error

* add TIsMKLDNN attr

* invalidate inputs if fcomputeex unsupported

* keep ptr to newly created default arrays

* add flag to all mkldnn operators

* update method name to CreateDefaultInputs

* remove dup attrs

* create new instance var to store copy

* only reorder if mkldnn

* add mkldnn flag to batch norm

* do not check input / output ptr for mkldnn as copied is made

* fix lint

* update macro

* update custom update name

* add todo for fallback

* fix lint

* rename opexecutor name

* add fallback to opexecutor class

* fix lint

* add todos

* create fallback arrays in place

* revert in place diff

* create copy of arrays for fallback

* empty array
  • Loading branch information
azai91 authored and anirudh2290 committed Sep 19, 2018
1 parent 796bd0e commit 016c8af
Show file tree
Hide file tree
Showing 16 changed files with 96 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/executor/attach_op_execs_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ class StatefulComputeExExecutor : public OpExecutor {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(out_array, req);
CreateDefaultInputs(in_array, &in_array_fallback);
fcompute_(state_, op_ctx, in_array_fallback, req, out_array);
return;
#endif
fcompute_(state_, op_ctx, in_array, req, out_array);
}
Expand Down Expand Up @@ -226,6 +229,13 @@ class FComputeExExecutor : public OpExecutor {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(out_array, req);
// TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented
const auto is_mkldnn = Op::GetAttr<bool>("TIsMKLDNN");
if (!is_mkldnn.get(attrs_.op, false)) {
CreateDefaultInputs(in_array, &in_array_fallback);
fcompute_(attrs_, op_ctx, in_array_fallback, req, out_array);
return;
}
#endif
fcompute_(attrs_, op_ctx, in_array, req, out_array);
}
Expand Down
4 changes: 4 additions & 0 deletions src/executor/exec_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class OpExecutor {
virtual OpStatePtr state() const {
return OpStatePtr();
}

// TODO(alexzai): (MXNET-856) Remove instance member after subgraph feature added
protected:
std::vector<NDArray> in_array_fallback;
};

/*!
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ The following activation functions are supported:
})
.set_attr<FCompute>("FCompute<cpu>", ActivationCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ActivationComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", ActivationGrad{"_backward_Activation"})
Expand Down Expand Up @@ -184,6 +185,7 @@ NNVM_REGISTER_OP(_backward_Activation)
#endif
.set_attr_parser(ParamParser<ActivationParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ActivationGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ActivationGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/batch_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ the sparse tensors will fallback.
#endif
.set_attr<nnvm::FGradient>("FGradient", BatchNormGrad)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down Expand Up @@ -633,6 +634,7 @@ NNVM_REGISTER_OP(_backward_BatchNorm)
#endif
.set_attr_parser(ParamParser<BatchNormParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", BatchNormGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", BatchNormGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ Example::
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true)
#endif
CONCAT_FORWARD_ATTRS
.set_attr<nnvm::FInferShape>("FInferShape", ConcatShape)
Expand All @@ -387,6 +388,7 @@ NNVM_REGISTER_OP(_backward_Concat)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FInferStorageType>("FInferStorageType", BackwardConcatStorageType)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConcatGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ConcatGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ There are other options to tune the performance.
#endif
.set_attr<FCompute>("FCompute<cpu>", ConvolutionCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", ConvolutionGrad{"_backward_Convolution"})
Expand All @@ -509,6 +510,7 @@ NNVM_REGISTER_OP(_backward_Convolution)
})
.set_attr_parser(ConvolutionParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ConvolutionGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ NNVM_REGISTER_OP(Deconvolution)
})
.set_attr<FCompute>("FCompute<cpu>", DeconvolutionCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", DeconvolutionGrad{"_backward_Deconvolution"})
Expand All @@ -436,6 +437,7 @@ NNVM_REGISTER_OP(_backward_Deconvolution)
})
.set_attr_parser(DeconvolutionParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", DeconvolutionGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/fully_connected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
return std::vector<std::string>{"output"};
})
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down Expand Up @@ -322,6 +323,7 @@ NNVM_REGISTER_OP(_backward_FullyConnected)
.set_attr<FInferStorageType>("FInferStorageType", BackwardFCStorageType)
.set_attr_parser(ParamParser<FullyConnectedParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", FullyConnectedGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", FullyConnectedGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/lrn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ number of kernels in the layer.
})
.set_attr<FCompute>("FCompute<cpu>", LRNCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", LRNComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", LRNGrad{"_backward_LRN"})
Expand All @@ -194,6 +195,7 @@ NNVM_REGISTER_OP(_backward_LRN)
#endif
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", LRNGradComputeExCPU)
// Native compute requires norm while MKLDNN does not so cannot be compared in debug mode
.set_attr<bool>("TExcludeMKLDNNDebug", true)
Expand Down
12 changes: 12 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,18 @@ static inline void InvalidateOutputs(const std::vector<NDArray> &arrs,
}
}

// TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added
static inline void CreateDefaultInputs(const std::vector<NDArray> &arrs,
std::vector<NDArray> *out_arrs) {
out_arrs->clear();
for (size_t i = 0; i < arrs.size(); ++i) {
if (arrs[i].IsMKLDNNData())
out_arrs->push_back(arrs[i].Reorder2Default());
else
out_arrs->push_back(arrs[i]);
}
}

const mkldnn::memory *GetWeights(const NDArray &arr,
const mkldnn::memory::primitive_desc &target_pd,
int num_groups);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ For each window ``X``, the mathematical expression for Lp pooling is:
.set_attr<nnvm::FInferShape>("FInferShape", PoolingShape)
.set_attr<FCompute>("FCompute<cpu>", PoolingCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", PoolingComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient",
Expand Down Expand Up @@ -424,6 +425,7 @@ NNVM_REGISTER_OP(_backward_Pooling)
#endif
.set_attr_parser(PoolingParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", PoolingGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", PoolingGradCompute<cpu>);
Expand Down
1 change: 1 addition & 0 deletions src/operator/nn/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Example::
})
.set_attr<FCompute>("FCompute<cpu>", SoftmaxCompute<cpu, mxnet_op::softmax_fwd>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", SoftmaxComputeExCPU)
.set_attr<FInferStorageType>("FInferStorageType", SoftmaxStorageType)
#endif
Expand Down
3 changes: 3 additions & 0 deletions src/operator/tensor/elemwise_sum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ The storage type of ``add_n`` output depends on storage types of inputs
[](const NodeAttrs& attrs) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
#endif
.set_attr<nnvm::FInferShape>("FInferShape", ElementWiseSumShape)
.set_attr<nnvm::FInferType>("FInferType", ElementWiseSumType)
.set_attr<FInferStorageType>("FInferStorageType", ElementWiseSumForwardInferStorageType)
Expand Down
4 changes: 4 additions & 0 deletions src/operator/tensor/elemwise_unary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,11 @@ class UnaryOp : public OpBase {
}
break;
case kWriteInplace:
// cannot check if ptrs are the same for MKLDNN because we may have
// created copies of input when reordering. WriteInPlace will still write to original array
#if MXNET_USE_MKLDNN == 0
CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_);
#endif
break;
case kNullOp:
break;
Expand Down
2 changes: 2 additions & 0 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ MXNET_OPERATOR_REGISTER_UNARY(_copy)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true)
#endif
.set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
[](const NodeAttrs& attrs){
Expand All @@ -225,6 +226,7 @@ NNVM_REGISTER_OP(_backward_copy)
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
.set_attr<FComputeEx>("FComputeEx<cpu>", CopyEx)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down
44 changes: 44 additions & 0 deletions tests/python/mkl/test_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,50 @@ def check_fullyconnected_training(stype):
for stype in stypes:
check_fullyconnected_training(stype)

@with_seed()
def test_non_mkldnn_fcomputeex():
# test special case where MKLDNN formatted NDArray feeds into non-mkldnn fcomputeex operator
# conv is example where MKLDNN NDArray is created from regular NDArrays
# CustomOps is example of non-mkldnn fcomputeex operator

@mx.operator.register("custom")
class CustomProp(mx.operator.CustomOpProp):
def __int__(self):
super(CustomProp, self).__init__(need_top_grad=False)

def list_arguments(self):
return ['data']

def list_outputs(self):
return ['output']

def infer_shape(self, in_shape):
data_shape = in_shape[0]
output_shape = in_shape[0]
return [data_shape], [output_shape], []

def infer_type(self, in_type):
dtype = in_type[0]
return [dtype], [dtype], []

def create_operator(self, ctx, shapes, dtypes):
return Custom()


class Custom(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
print(in_data[0])
self.assign(out_data[0], req[0], in_data[0])

def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
self.assign(in_grad[0], req[0], out_grad)

data = mx.symbol.Variable('data')
conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True)
custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])})
exec1.forward()[0].wait_to_read()


if __name__ == '__main__':
install.test_mkldnn_install()

0 comments on commit 016c8af

Please sign in to comment.