Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-753] Fallback when using non-MKLDNN supported operators #12019

Merged
merged 25 commits into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/executor/attach_op_execs_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ class StatefulComputeExExecutor : public OpExecutor {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(out_array, req);
CreateDefaultInputs(in_array, &in_array_fallback);
fcompute_(state_, op_ctx, in_array_fallback, req, out_array);
return;
#endif
fcompute_(state_, op_ctx, in_array, req, out_array);
}
Expand Down Expand Up @@ -226,6 +229,13 @@ class FComputeExExecutor : public OpExecutor {
op_ctx.run_ctx = rctx;
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(out_array, req);
// TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented
const auto is_mkldnn = Op::GetAttr<bool>("TIsMKLDNN");
if (!is_mkldnn.get(attrs_.op, false)) {
CreateDefaultInputs(in_array, &in_array_fallback);
fcompute_(attrs_, op_ctx, in_array_fallback, req, out_array);
return;
}
#endif
fcompute_(attrs_, op_ctx, in_array, req, out_array);
}
Expand Down
4 changes: 4 additions & 0 deletions src/executor/exec_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class OpExecutor {
virtual OpStatePtr state() const {
return OpStatePtr();
}

// TODO(alexzai): (MXNET-856) Remove instance member after subgraph feature added
protected:
std::vector<NDArray> in_array_fallback;
};

/*!
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ The following activation functions are supported:
})
.set_attr<FCompute>("FCompute<cpu>", ActivationCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a more generic way to add this instead of doing this for all operators? What if we later we add new operators, should we document this somewhere?

Copy link
Contributor Author

@azai91 azai91 Aug 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it only needs to be added to MKLDNN operators. this fix was a temporary solution while we get the subgraph feature implemented. we weighed the pros / cons of waiting to release a stable MKLDNN stable with this hack or waiting another month for subgraph to be introduced (possibly with it's own bugs) and decided we would use this short term solution.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please mark this TODO and create a JIRA ticket to remove this later after MKLDNN support is released.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added TODO to opexecuter

.set_attr<FComputeEx>("FComputeEx<cpu>", ActivationComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", ActivationGrad{"_backward_Activation"})
Expand Down Expand Up @@ -184,6 +185,7 @@ NNVM_REGISTER_OP(_backward_Activation)
#endif
.set_attr_parser(ParamParser<ActivationParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ActivationGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ActivationGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/batch_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ the sparse tensors will fallback.
#endif
.set_attr<nnvm::FGradient>("FGradient", BatchNormGrad)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down Expand Up @@ -633,6 +634,7 @@ NNVM_REGISTER_OP(_backward_BatchNorm)
#endif
.set_attr_parser(ParamParser<BatchNormParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", BatchNormGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", BatchNormGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/concat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ Example::
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true)
#endif
CONCAT_FORWARD_ATTRS
.set_attr<nnvm::FInferShape>("FInferShape", ConcatShape)
Expand All @@ -387,6 +388,7 @@ NNVM_REGISTER_OP(_backward_Concat)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FInferStorageType>("FInferStorageType", BackwardConcatStorageType)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConcatGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ConcatGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ There are other options to tune the performance.
#endif
.set_attr<FCompute>("FCompute<cpu>", ConvolutionCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", ConvolutionGrad{"_backward_Convolution"})
Expand All @@ -509,6 +510,7 @@ NNVM_REGISTER_OP(_backward_Convolution)
})
.set_attr_parser(ConvolutionParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", ConvolutionGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/deconvolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ NNVM_REGISTER_OP(Deconvolution)
})
.set_attr<FCompute>("FCompute<cpu>", DeconvolutionCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", DeconvolutionGrad{"_backward_Deconvolution"})
Expand All @@ -436,6 +437,7 @@ NNVM_REGISTER_OP(_backward_Deconvolution)
})
.set_attr_parser(DeconvolutionParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", DeconvolutionGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/fully_connected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
return std::vector<std::string>{"output"};
})
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down Expand Up @@ -322,6 +323,7 @@ NNVM_REGISTER_OP(_backward_FullyConnected)
.set_attr<FInferStorageType>("FInferStorageType", BackwardFCStorageType)
.set_attr_parser(ParamParser<FullyConnectedParam>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", FullyConnectedGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", FullyConnectedGradCompute<cpu>);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/lrn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ number of kernels in the layer.
})
.set_attr<FCompute>("FCompute<cpu>", LRNCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", LRNComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient", LRNGrad{"_backward_LRN"})
Expand All @@ -194,6 +195,7 @@ NNVM_REGISTER_OP(_backward_LRN)
#endif
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", LRNGradComputeExCPU)
// Native compute requires norm while MKLDNN does not so cannot be compared in debug mode
.set_attr<bool>("TExcludeMKLDNNDebug", true)
Expand Down
12 changes: 12 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,18 @@ static inline void InvalidateOutputs(const std::vector<NDArray> &arrs,
}
}

// TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added
static inline void CreateDefaultInputs(const std::vector<NDArray> &arrs,
std::vector<NDArray> *out_arrs) {
out_arrs->clear();
for (size_t i = 0; i < arrs.size(); ++i) {
if (arrs[i].IsMKLDNNData())
out_arrs->push_back(arrs[i].Reorder2Default());
else
out_arrs->push_back(arrs[i]);
}
}

const mkldnn::memory *GetWeights(const NDArray &arr,
const mkldnn::memory::primitive_desc &target_pd,
int num_groups);
Expand Down
2 changes: 2 additions & 0 deletions src/operator/nn/pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ For each window ``X``, the mathematical expression for Lp pooling is:
.set_attr<nnvm::FInferShape>("FInferShape", PoolingShape)
.set_attr<FCompute>("FCompute<cpu>", PoolingCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", PoolingComputeExCPU)
#endif
.set_attr<nnvm::FGradient>("FGradient",
Expand Down Expand Up @@ -424,6 +425,7 @@ NNVM_REGISTER_OP(_backward_Pooling)
#endif
.set_attr_parser(PoolingParamParser)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", PoolingGradComputeExCPU)
#endif
.set_attr<FCompute>("FCompute<cpu>", PoolingGradCompute<cpu>);
Expand Down
1 change: 1 addition & 0 deletions src/operator/nn/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Example::
})
.set_attr<FCompute>("FCompute<cpu>", SoftmaxCompute<cpu, mxnet_op::softmax_fwd>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", SoftmaxComputeExCPU)
.set_attr<FInferStorageType>("FInferStorageType", SoftmaxStorageType)
#endif
Expand Down
3 changes: 3 additions & 0 deletions src/operator/tensor/elemwise_sum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ The storage type of ``add_n`` output depends on storage types of inputs
[](const NodeAttrs& attrs) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
#endif
.set_attr<nnvm::FInferShape>("FInferShape", ElementWiseSumShape)
.set_attr<nnvm::FInferType>("FInferType", ElementWiseSumType)
.set_attr<FInferStorageType>("FInferStorageType", ElementWiseSumForwardInferStorageType)
Expand Down
4 changes: 4 additions & 0 deletions src/operator/tensor/elemwise_unary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,11 @@ class UnaryOp : public OpBase {
}
break;
case kWriteInplace:
// cannot check if ptrs are the same for MKLDNN because we may have
// created copies of input when reordering. WriteInPlace will still write to original array
#if MXNET_USE_MKLDNN == 0
CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_);
#endif
break;
case kNullOp:
break;
Expand Down
2 changes: 2 additions & 0 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ MXNET_OPERATOR_REGISTER_UNARY(_copy)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsMKLDNN", true)
#endif
.set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
[](const NodeAttrs& attrs){
Expand All @@ -225,6 +226,7 @@ NNVM_REGISTER_OP(_backward_copy)
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
.set_attr<FComputeEx>("FComputeEx<cpu>", CopyEx)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
Expand Down
44 changes: 44 additions & 0 deletions tests/python/mkl/test_mkldnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,50 @@ def check_fullyconnected_training(stype):
for stype in stypes:
check_fullyconnected_training(stype)

@with_seed()
def test_non_mkldnn_fcomputeex():
# test special case where MKLDNN formatted NDArray feeds into non-mkldnn fcomputeex operator
# conv is example where MKLDNN NDArray is created from regular NDArrays
# CustomOps is example of non-mkldnn fcomputeex operator

@mx.operator.register("custom")
class CustomProp(mx.operator.CustomOpProp):
def __int__(self):
super(CustomProp, self).__init__(need_top_grad=False)

def list_arguments(self):
return ['data']

def list_outputs(self):
return ['output']

def infer_shape(self, in_shape):
data_shape = in_shape[0]
output_shape = in_shape[0]
return [data_shape], [output_shape], []

def infer_type(self, in_type):
dtype = in_type[0]
return [dtype], [dtype], []

def create_operator(self, ctx, shapes, dtypes):
return Custom()


class Custom(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
print(in_data[0])
self.assign(out_data[0], req[0], in_data[0])

def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
self.assign(in_grad[0], req[0], out_grad)

data = mx.symbol.Variable('data')
conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True)
custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom')
exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])})
exec1.forward()[0].wait_to_read()


if __name__ == '__main__':
install.test_mkldnn_install()