From a4e8df4b0449f60e8343c610525cd176860ca1b4 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 2 Aug 2018 14:27:44 -0700 Subject: [PATCH 01/25] add fallback test --- tests/python/mkl/test_mkldnn.py | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index ba4cf3f0116a..fa7df5e663ba 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -381,6 +381,50 @@ def check_fullyconnected_training(stype): for stype in stypes: check_fullyconnected_training(stype) +@with_seed() +def test_non_mkldnn_fcomputeex(): + # test special case where MKLDNN formatted NDArray feeds into non-mkldnn fcomputeex operator + # conv is example where MKLDNN NDArray is created from regular NDArrays + # CustomOps is example of non-mkldnn fcomputeex operator + + @mx.operator.register("custom") + class CustomProp(mx.operator.CustomOpProp): + def __int__(self): + super(CustomProp, self).__init__(need_top_grad=False) + + def list_arguments(self): + return ['data'] + + def list_outputs(self): + return ['output'] + + def infer_shape(self, in_shape): + data_shape = in_shape[0] + output_shape = in_shape[0] + return [data_shape], [output_shape], [] + + def infer_type(self, in_type): + dtype = in_type[0] + return [dtype], [dtype], [] + + def create_operator(self, ctx, shapes, dtypes): + return Custom() + + + class Custom(mx.operator.CustomOp): + def forward(self, is_train, req, in_data, out_data, aux): + print(in_data[0]) + self.assign(out_data[0], req[0], in_data[0]) + + def backward(self, req, out_grad, in_data, out_data, in_grad, aux): + self.assign(in_grad[0], req[0], out_grad) + + data = mx.symbol.Variable('data') + conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True) + mlp = mx.symbol.Custom(name='custom', data=conv, op_type='custom') + exec1 = mlp.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])}) + exec1.forward() + if __name__ == '__main__': install.test_mkldnn_install() From ffec56373e4648f053136e449894b1d70b0f99a9 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 2 Aug 2018 15:17:01 -0700 Subject: [PATCH 02/25] wait to read throws error --- tests/python/mkl/test_mkldnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index fa7df5e663ba..ab2d6fd9b6b2 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -423,7 +423,7 @@ def backward(self, req, out_grad, in_data, out_data, in_grad, aux): conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True) mlp = mx.symbol.Custom(name='custom', data=conv, op_type='custom') exec1 = mlp.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])}) - exec1.forward() + exec1.forward()[0].wait_to_read() if __name__ == '__main__': From e5b0d4e53dc4f839ad665562ab6321778288492b Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 2 Aug 2018 15:45:37 -0700 Subject: [PATCH 03/25] add TIsMKLDNN attr --- src/operator/nn/convolution.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 8f25cf0dcbb1..d5abe629123b 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -484,6 +484,7 @@ There are other options to tune the performance. #endif .set_attr("FCompute", ConvolutionCompute) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", ConvolutionComputeExCPU) #endif .set_attr("FGradient", ConvolutionGrad{"_backward_Convolution"}) @@ -509,6 +510,7 @@ NNVM_REGISTER_OP(_backward_Convolution) }) .set_attr_parser(ConvolutionParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", ConvolutionGradComputeExCPU) #endif .set_attr("FCompute", ConvolutionGradCompute); From 57cbf0acbfaf767ae5b8896c27d703e65fba7ef5 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 2 Aug 2018 18:02:55 -0700 Subject: [PATCH 04/25] invalidate inputs if fcomputeex unsupported --- src/executor/attach_op_execs_pass.cc | 7 +++++++ src/operator/nn/mkldnn/mkldnn_base-inl.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index c011c1d9ce03..726d4a78571f 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -159,6 +159,8 @@ class StatefulComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); + fcompute_(state_, op_ctx, InvalidateInputs(in_array), req, out_array); + return; #endif fcompute_(state_, op_ctx, in_array, req, out_array); } @@ -226,6 +228,11 @@ class FComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); + const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); + if (!is_mkldnn.get(attrs_.op, false)) { + fcompute_(attrs_, op_ctx, InvalidateInputs(in_array), req, out_array); + return; + } #endif fcompute_(attrs_, op_ctx, in_array, req, out_array); } diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 273afcd32dc7..bd204402a6db 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -356,6 +356,13 @@ static inline void InvalidateOutputs(const std::vector &arrs, } } +static inline std::vector InvalidateInputs(const std::vector &arrs) { + std::vector buffer(arrs.size()); + for (size_t i = 0; i < arrs.size(); ++i) + buffer[i] = arrs[i].Reorder2Default(); + return buffer; +} + const mkldnn::memory *GetWeights(const NDArray &arr, const mkldnn::memory::primitive_desc &target_pd, int num_groups); From 6b6b1c5fe64ebf36288bb035b55f217e030bc7bd Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 16:32:43 -0700 Subject: [PATCH 05/25] keep ptr to newly created default arrays --- src/executor/attach_op_execs_pass.cc | 6 ++++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 726d4a78571f..655af91bc604 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -159,7 +159,8 @@ class StatefulComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - fcompute_(state_, op_ctx, InvalidateInputs(in_array), req, out_array); + in_array = CreateInputsInputs(in_array); + fcompute_(state_, op_ctx, in_array, req, out_array); return; #endif fcompute_(state_, op_ctx, in_array, req, out_array); @@ -230,7 +231,8 @@ class FComputeExExecutor : public OpExecutor { InvalidateOutputs(out_array, req); const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { - fcompute_(attrs_, op_ctx, InvalidateInputs(in_array), req, out_array); + in_array = CreateInputsInputs(in_array); + fcompute_(attrs_, op_ctx, in_array, req, out_array); return; } #endif diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index bd204402a6db..f790ad8abe03 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -356,7 +356,7 @@ static inline void InvalidateOutputs(const std::vector &arrs, } } -static inline std::vector InvalidateInputs(const std::vector &arrs) { +static inline std::vector CreateInputsInputs(const std::vector &arrs) { std::vector buffer(arrs.size()); for (size_t i = 0; i < arrs.size(); ++i) buffer[i] = arrs[i].Reorder2Default(); From e5480929d0b0c78575c6a376eca3b4495f739920 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 16:43:33 -0700 Subject: [PATCH 06/25] add flag to all mkldnn operators --- src/operator/nn/activation.cc | 2 ++ src/operator/nn/concat.cc | 2 ++ src/operator/nn/deconvolution.cc | 2 ++ src/operator/nn/fully_connected.cc | 2 ++ src/operator/nn/lrn.cc | 2 ++ src/operator/nn/pooling.cc | 2 ++ src/operator/nn/softmax.cc | 1 + src/operator/tensor/elemwise_sum.cc | 3 +++ src/operator/tensor/elemwise_unary_op_basic.cc | 1 + 9 files changed, 17 insertions(+) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index b8c2045fba12..ba44ebd4ed4d 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -155,6 +155,7 @@ The following activation functions are supported: }) .set_attr("FCompute", ActivationCompute) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", ActivationComputeExCPU) #endif .set_attr("FGradient", ActivationGrad{"_backward_Activation"}) @@ -184,6 +185,7 @@ NNVM_REGISTER_OP(_backward_Activation) #endif .set_attr_parser(ParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", ActivationGradComputeExCPU) #endif .set_attr("FCompute", ActivationGradCompute); diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 9df459e9224d..ac8a814ce70f 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -367,6 +367,7 @@ Example:: .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) +.set_attr("TIsMKLDNN", true) #endif CONCAT_FORWARD_ATTRS .set_attr("FInferShape", ConcatShape) @@ -387,6 +388,7 @@ NNVM_REGISTER_OP(_backward_Concat) .set_attr("TIsBackward", true) .set_attr("FInferStorageType", BackwardConcatStorageType) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", ConcatGradComputeExCPU) #endif .set_attr("FCompute", ConcatGradCompute); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index a4be1a0c56a0..1ab391d92b04 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -413,6 +413,7 @@ NNVM_REGISTER_OP(Deconvolution) }) .set_attr("FCompute", DeconvolutionCompute) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", DeconvolutionComputeExCPU) #endif .set_attr("FGradient", DeconvolutionGrad{"_backward_Deconvolution"}) @@ -436,6 +437,7 @@ NNVM_REGISTER_OP(_backward_Deconvolution) }) .set_attr_parser(DeconvolutionParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", DeconvolutionGradComputeExCPU) #endif .set_attr("FCompute", DeconvolutionGradCompute); diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index eb881d29abd1..d8a32f0ae963 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -290,6 +290,7 @@ If ``no_bias`` is set to be true, then the ``bias`` term is ignored. return std::vector{"output"}; }) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) @@ -322,6 +323,7 @@ NNVM_REGISTER_OP(_backward_FullyConnected) .set_attr("FInferStorageType", BackwardFCStorageType) .set_attr_parser(ParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", FullyConnectedGradComputeExCPU) #endif .set_attr("FCompute", FullyConnectedGradCompute); diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 587cf930920e..a428eb1e4faf 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -180,6 +180,7 @@ number of kernels in the layer. }) .set_attr("FCompute", LRNCompute) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", LRNComputeExCPU) #endif .set_attr("FGradient", LRNGrad{"_backward_LRN"}) @@ -194,6 +195,7 @@ NNVM_REGISTER_OP(_backward_LRN) #endif .set_attr("TIsBackward", true) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", LRNGradComputeExCPU) // Native compute requires norm while MKLDNN does not so cannot be compared in debug mode .set_attr("TExcludeMKLDNNDebug", true) diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 2d118142bc79..e4cb2324e451 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -395,6 +395,7 @@ For each window ``X``, the mathematical expression for Lp pooling is: .set_attr("FInferShape", PoolingShape) .set_attr("FCompute", PoolingCompute) #if MXNET_USE_MKLDNN == 1 + .set_attr("TIsMKLDNN", true).set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", PoolingComputeExCPU) #endif .set_attr("FGradient", @@ -424,6 +425,7 @@ NNVM_REGISTER_OP(_backward_Pooling) #endif .set_attr_parser(PoolingParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", PoolingGradComputeExCPU) #endif .set_attr("FCompute", PoolingGradCompute); diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc index 88b7b5fc473e..81e775cac526 100644 --- a/src/operator/nn/softmax.cc +++ b/src/operator/nn/softmax.cc @@ -98,6 +98,7 @@ Example:: }) .set_attr("FCompute", SoftmaxCompute) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", SoftmaxComputeExCPU) .set_attr("FInferStorageType", SoftmaxStorageType) #endif diff --git a/src/operator/tensor/elemwise_sum.cc b/src/operator/tensor/elemwise_sum.cc index 9630988165ce..1666537e2860 100644 --- a/src/operator/tensor/elemwise_sum.cc +++ b/src/operator/tensor/elemwise_sum.cc @@ -179,6 +179,9 @@ The storage type of ``add_n`` output depends on storage types of inputs [](const NodeAttrs& attrs) { return std::vector{ResourceRequest::kTempSpace}; }) +#if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) +#endif .set_attr("FInferShape", ElementWiseSumShape) .set_attr("FInferType", ElementWiseSumType) .set_attr("FInferStorageType", ElementWiseSumForwardInferStorageType) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index f7f21f9076a6..66fa0a130129 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -225,6 +225,7 @@ NNVM_REGISTER_OP(_backward_copy) .set_attr("FCompute", UnaryOp::IdentityCompute) .set_attr("FComputeEx", CopyEx) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) From 54e08d0e92224d1c34e30a5d969913975f135286 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 16:50:36 -0700 Subject: [PATCH 07/25] update method name to CreateDefaultInputs --- src/executor/attach_op_execs_pass.cc | 4 ++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 655af91bc604..38d75ac53b4b 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -159,7 +159,7 @@ class StatefulComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - in_array = CreateInputsInputs(in_array); + in_array = CreateDefaultInputs(in_array); fcompute_(state_, op_ctx, in_array, req, out_array); return; #endif @@ -231,7 +231,7 @@ class FComputeExExecutor : public OpExecutor { InvalidateOutputs(out_array, req); const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { - in_array = CreateInputsInputs(in_array); + in_array = CreateDefaultInputs(in_array); fcompute_(attrs_, op_ctx, in_array, req, out_array); return; } diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index f790ad8abe03..4b2eb6ff865b 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -356,7 +356,7 @@ static inline void InvalidateOutputs(const std::vector &arrs, } } -static inline std::vector CreateInputsInputs(const std::vector &arrs) { +static inline std::vector CreateDefaultInputs(const std::vector &arrs) { std::vector buffer(arrs.size()); for (size_t i = 0; i < arrs.size(); ++i) buffer[i] = arrs[i].Reorder2Default(); From cf28508ff114b7e54a4561d6f0b06cae2c30e296 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 16:51:22 -0700 Subject: [PATCH 08/25] remove dup attrs --- src/operator/nn/pooling.cc | 2 +- src/operator/tensor/elemwise_unary_op_basic.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index e4cb2324e451..c133b63623af 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -395,7 +395,7 @@ For each window ``X``, the mathematical expression for Lp pooling is: .set_attr("FInferShape", PoolingShape) .set_attr("FCompute", PoolingCompute) #if MXNET_USE_MKLDNN == 1 - .set_attr("TIsMKLDNN", true).set_attr("TIsMKLDNN", true) +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", PoolingComputeExCPU) #endif .set_attr("FGradient", diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 66fa0a130129..2efbf1813e51 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -206,6 +206,7 @@ MXNET_OPERATOR_REGISTER_UNARY(_copy) .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) + .set_attr("TIsMKLDNN", true).set_attr("TIsMKLDNN", true) #endif .set_attr("FInplaceIdentity", [](const NodeAttrs& attrs){ From beff2f199f7b9f4f4e85be247534ddb56cb66750 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 18:39:31 -0700 Subject: [PATCH 09/25] create new instance var to store copy --- src/executor/attach_op_execs_pass.cc | 17 +++++++++++------ src/operator/tensor/elemwise_unary_op_basic.cc | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 38d75ac53b4b..02ab62ed11dc 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -40,6 +40,11 @@ const OperatorProperty* OpPropGetOpProperty(const NodeAttrs& attrs); namespace exec { +class MKLDNNOpExecutor : public OpExecutor { + protected: + std::vector in_array_fallback; +}; + // abstract OpExecutor which provides storage fallback procedure on // non-default inputs and outputs // FComputeExecutor and FStatefulComputeExecutor inherit from this class @@ -153,14 +158,14 @@ class StatefulComputeExecutor : public StorageFallbackOpExecutor { // stateful compute_ex executor -class StatefulComputeExExecutor : public OpExecutor { +class StatefulComputeExExecutor : public MKLDNNOpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - in_array = CreateDefaultInputs(in_array); - fcompute_(state_, op_ctx, in_array, req, out_array); + in_array_fallback = CreateDefaultInputs(in_array); + fcompute_(state_, op_ctx, in_array_fallback, req, out_array); return; #endif fcompute_(state_, op_ctx, in_array, req, out_array); @@ -223,7 +228,7 @@ class FComputeExecutor : public StorageFallbackOpExecutor { }; // fcompute_ex executor -class FComputeExExecutor : public OpExecutor { +class FComputeExExecutor : public MKLDNNOpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; @@ -231,8 +236,8 @@ class FComputeExExecutor : public OpExecutor { InvalidateOutputs(out_array, req); const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { - in_array = CreateDefaultInputs(in_array); - fcompute_(attrs_, op_ctx, in_array, req, out_array); + in_array_fallback = CreateDefaultInputs(in_array); + fcompute_(attrs_, op_ctx, in_array_fallback, req, out_array); return; } #endif diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 2efbf1813e51..c3e9c2dc91d0 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -206,7 +206,7 @@ MXNET_OPERATOR_REGISTER_UNARY(_copy) .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) - .set_attr("TIsMKLDNN", true).set_attr("TIsMKLDNN", true) +.set_attr("TIsMKLDNN", true) #endif .set_attr("FInplaceIdentity", [](const NodeAttrs& attrs){ From 403f601d0ff062f2a41bd78a18631bcdb8db28d6 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 6 Aug 2018 20:10:44 -0700 Subject: [PATCH 10/25] only reorder if mkldnn --- src/operator/nn/mkldnn/mkldnn_base-inl.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 4b2eb6ff865b..ee69688714af 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -358,8 +358,12 @@ static inline void InvalidateOutputs(const std::vector &arrs, static inline std::vector CreateDefaultInputs(const std::vector &arrs) { std::vector buffer(arrs.size()); - for (size_t i = 0; i < arrs.size(); ++i) - buffer[i] = arrs[i].Reorder2Default(); + for (size_t i = 0; i < arrs.size(); ++i) { + if (arrs[i].IsMKLDNNData()) + buffer[i] = arrs[i].Reorder2Default(); + else + buffer[i] = arrs[i]; + } return buffer; } From 449339aaf874dd2301afcc4e77053224b3309b03 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 9 Aug 2018 12:33:29 -0700 Subject: [PATCH 11/25] add mkldnn flag to batch norm --- src/operator/nn/batch_norm.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index b15f84e107e0..4ea494d64e47 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -601,6 +601,7 @@ the sparse tensors will fallback. #endif .set_attr("FGradient", BatchNormGrad) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) @@ -633,6 +634,7 @@ NNVM_REGISTER_OP(_backward_BatchNorm) #endif .set_attr_parser(ParamParser) #if MXNET_USE_MKLDNN == 1 +.set_attr("TIsMKLDNN", true) .set_attr("FComputeEx", BatchNormGradComputeExCPU) #endif .set_attr("FCompute", BatchNormGradCompute); From 79e9a751dadaaabe703380d94dd2e74f8db68151 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Tue, 14 Aug 2018 21:32:50 -0700 Subject: [PATCH 12/25] do not check input / output ptr for mkldnn as copied is made --- src/operator/tensor/elemwise_unary_op.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index e09a6cccddbf..2ef8d25d1ae9 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -299,7 +299,11 @@ class UnaryOp : public OpBase { } break; case kWriteInplace: +// cannot check if ptrs are the same for MKLDNN because we may have created copies of input when reordering. +// WriteInPlace will still write to original array +#if MXNET_USE_MKLDNN != 1 CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_); +#endif break; case kNullOp: break; From 291bfb7cbf7e74fda1817584def7dc7b716adc1c Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 15 Aug 2018 11:57:26 -0700 Subject: [PATCH 13/25] fix lint --- src/operator/tensor/elemwise_unary_op.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 2ef8d25d1ae9..721cf16e1dd3 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -299,8 +299,8 @@ class UnaryOp : public OpBase { } break; case kWriteInplace: -// cannot check if ptrs are the same for MKLDNN because we may have created copies of input when reordering. -// WriteInPlace will still write to original array +// cannot check if ptrs are the same for MKLDNN because we may have +// created copies of input when reordering. WriteInPlace will still write to original array #if MXNET_USE_MKLDNN != 1 CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_); #endif From e4d673ba03523ade5f98003ac3d515c2e14ca219 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 20 Aug 2018 14:51:03 -0700 Subject: [PATCH 14/25] update macro --- src/operator/tensor/elemwise_unary_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 721cf16e1dd3..eb070a411279 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -301,7 +301,7 @@ class UnaryOp : public OpBase { case kWriteInplace: // cannot check if ptrs are the same for MKLDNN because we may have // created copies of input when reordering. WriteInPlace will still write to original array -#if MXNET_USE_MKLDNN != 1 +#if MXNET_USE_MKLDNN == 0 CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_); #endif break; From c548d6c197d41cd4a2ee4670c6f6faa97be6c815 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 20 Aug 2018 14:51:31 -0700 Subject: [PATCH 15/25] update custom update name --- tests/python/mkl/test_mkldnn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py index ab2d6fd9b6b2..e597d0f5fc58 100644 --- a/tests/python/mkl/test_mkldnn.py +++ b/tests/python/mkl/test_mkldnn.py @@ -421,8 +421,8 @@ def backward(self, req, out_grad, in_data, out_data, in_grad, aux): data = mx.symbol.Variable('data') conv = mx.sym.Convolution(data=data, kernel=(5, 5), pad=(1, 1), stride=(1,1), num_filter=8, name="conv", no_bias=True) - mlp = mx.symbol.Custom(name='custom', data=conv, op_type='custom') - exec1 = mlp.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])}) + custom = mx.symbol.Custom(name='custom', data=conv, op_type='custom') + exec1 = custom.bind(mx.cpu(), args={'data': mx.nd.ones([10,3,96,96]), 'conv_weight': mx.nd.ones([8,3,5,5])}) exec1.forward()[0].wait_to_read() From 92942e72b60386ec12c80b3d01b1dfceaebe36d6 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Mon, 20 Aug 2018 14:55:52 -0700 Subject: [PATCH 16/25] add todo for fallback --- src/executor/attach_op_execs_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 02ab62ed11dc..9523c9530f2d 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -234,6 +234,7 @@ class FComputeExExecutor : public MKLDNNOpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); + // TODO (alex): (MXNET-847) Remove this fallback feature after subgraph implemented const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { in_array_fallback = CreateDefaultInputs(in_array); From b64ea5b509f8d06bc67d6b26558a4daf0079d964 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Tue, 21 Aug 2018 14:52:27 -0700 Subject: [PATCH 17/25] fix lint --- src/executor/attach_op_execs_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 9523c9530f2d..c67b81544014 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -234,7 +234,7 @@ class FComputeExExecutor : public MKLDNNOpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - // TODO (alex): (MXNET-847) Remove this fallback feature after subgraph implemented + // TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { in_array_fallback = CreateDefaultInputs(in_array); From 8668f01106ae51ad9bdd164d8378070e84f75900 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 22 Aug 2018 16:10:57 -0700 Subject: [PATCH 18/25] rename opexecutor name --- src/executor/attach_op_execs_pass.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index c67b81544014..c6d1d49998db 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -40,7 +40,7 @@ const OperatorProperty* OpPropGetOpProperty(const NodeAttrs& attrs); namespace exec { -class MKLDNNOpExecutor : public OpExecutor { +class FallbackOpExecutor : public OpExecutor { protected: std::vector in_array_fallback; }; @@ -158,7 +158,7 @@ class StatefulComputeExecutor : public StorageFallbackOpExecutor { // stateful compute_ex executor -class StatefulComputeExExecutor : public MKLDNNOpExecutor { +class StatefulComputeExExecutor : public FallbackOpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; @@ -228,7 +228,7 @@ class FComputeExecutor : public StorageFallbackOpExecutor { }; // fcompute_ex executor -class FComputeExExecutor : public MKLDNNOpExecutor { +class FComputeExExecutor : public FallbackOpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; From c169b9b8408d403e2fbeb4aeaacf41c777c01be8 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 22 Aug 2018 16:12:25 -0700 Subject: [PATCH 19/25] add fallback to opexecutor class --- src/executor/attach_op_execs_pass.cc | 9 ++------- src/executor/exec_pass.h | 2 ++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index c6d1d49998db..1beeab9f0070 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -40,11 +40,6 @@ const OperatorProperty* OpPropGetOpProperty(const NodeAttrs& attrs); namespace exec { -class FallbackOpExecutor : public OpExecutor { - protected: - std::vector in_array_fallback; -}; - // abstract OpExecutor which provides storage fallback procedure on // non-default inputs and outputs // FComputeExecutor and FStatefulComputeExecutor inherit from this class @@ -158,7 +153,7 @@ class StatefulComputeExecutor : public StorageFallbackOpExecutor { // stateful compute_ex executor -class StatefulComputeExExecutor : public FallbackOpExecutor { +class StatefulComputeExExecutor : public OpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; @@ -228,7 +223,7 @@ class FComputeExecutor : public StorageFallbackOpExecutor { }; // fcompute_ex executor -class FComputeExExecutor : public FallbackOpExecutor { +class FComputeExExecutor : public OpExecutor { public: void Run(RunContext rctx, bool is_gpu) override { op_ctx.run_ctx = rctx; diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h index cd1db0ac1944..2ec87057aeb0 100644 --- a/src/executor/exec_pass.h +++ b/src/executor/exec_pass.h @@ -86,6 +86,8 @@ class OpExecutor { virtual OpStatePtr state() const { return OpStatePtr(); } + protected: + std::vector in_array_fallback; }; /*! From f201737c6e136596b5d034142269bbdb9303d2fb Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 22 Aug 2018 16:28:50 -0700 Subject: [PATCH 20/25] fix lint --- src/executor/exec_pass.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h index 2ec87057aeb0..b50ce59772c0 100644 --- a/src/executor/exec_pass.h +++ b/src/executor/exec_pass.h @@ -86,6 +86,7 @@ class OpExecutor { virtual OpStatePtr state() const { return OpStatePtr(); } + protected: std::vector in_array_fallback; }; From 997faa0674509680eb60158588029c09b1ce68e2 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 23 Aug 2018 10:52:30 -0700 Subject: [PATCH 21/25] add todos --- src/executor/exec_pass.h | 1 + src/operator/nn/mkldnn/mkldnn_base-inl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h index b50ce59772c0..52f7c790c77e 100644 --- a/src/executor/exec_pass.h +++ b/src/executor/exec_pass.h @@ -87,6 +87,7 @@ class OpExecutor { return OpStatePtr(); } + // TODO(alexzai): (MXNET-856) Remove instance member after subgraph feature added protected: std::vector in_array_fallback; }; diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index ee69688714af..7a8e09b6b1c8 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -356,6 +356,7 @@ static inline void InvalidateOutputs(const std::vector &arrs, } } +// TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added static inline std::vector CreateDefaultInputs(const std::vector &arrs) { std::vector buffer(arrs.size()); for (size_t i = 0; i < arrs.size(); ++i) { From a26e739adf059e53ebfe6a748562eb67c112ede6 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Fri, 24 Aug 2018 11:24:00 -0700 Subject: [PATCH 22/25] create fallback arrays in place --- src/executor/attach_op_execs_pass.cc | 4 ++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index 1beeab9f0070..c619961a2f24 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -159,7 +159,7 @@ class StatefulComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - in_array_fallback = CreateDefaultInputs(in_array); + CreateDefaultInputs(in_array, in_array_fallback); fcompute_(state_, op_ctx, in_array_fallback, req, out_array); return; #endif @@ -232,7 +232,7 @@ class FComputeExExecutor : public OpExecutor { // TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { - in_array_fallback = CreateDefaultInputs(in_array); + CreateDefaultInputs(in_array, in_array_fallback); fcompute_(attrs_, op_ctx, in_array_fallback, req, out_array); return; } diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 7a8e09b6b1c8..64a67237f6f9 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -357,15 +357,14 @@ static inline void InvalidateOutputs(const std::vector &arrs, } // TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added -static inline std::vector CreateDefaultInputs(const std::vector &arrs) { - std::vector buffer(arrs.size()); +static inline void CreateDefaultInputs(const std::vector &arrs, + const std::vector &out_arrs) { for (size_t i = 0; i < arrs.size(); ++i) { if (arrs[i].IsMKLDNNData()) - buffer[i] = arrs[i].Reorder2Default(); + out_arrs[i] = arrs[i].Reorder2Default(); else - buffer[i] = arrs[i]; + out_arrs[i] = arrs[i]; } - return buffer; } const mkldnn::memory *GetWeights(const NDArray &arr, From de9cffad5527e93c0efa02d7fd26fcba9792fa73 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Tue, 28 Aug 2018 08:15:31 -0700 Subject: [PATCH 23/25] revert in place diff --- src/executor/attach_op_execs_pass.cc | 4 ++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc index c619961a2f24..0e415ef5112a 100644 --- a/src/executor/attach_op_execs_pass.cc +++ b/src/executor/attach_op_execs_pass.cc @@ -159,7 +159,7 @@ class StatefulComputeExExecutor : public OpExecutor { op_ctx.run_ctx = rctx; #if MXNET_USE_MKLDNN == 1 InvalidateOutputs(out_array, req); - CreateDefaultInputs(in_array, in_array_fallback); + CreateDefaultInputs(in_array, &in_array_fallback); fcompute_(state_, op_ctx, in_array_fallback, req, out_array); return; #endif @@ -232,7 +232,7 @@ class FComputeExExecutor : public OpExecutor { // TODO(alex): (MXNET-847) Remove this fallback feature after subgraph implemented const auto is_mkldnn = Op::GetAttr("TIsMKLDNN"); if (!is_mkldnn.get(attrs_.op, false)) { - CreateDefaultInputs(in_array, in_array_fallback); + CreateDefaultInputs(in_array, &in_array_fallback); fcompute_(attrs_, op_ctx, in_array_fallback, req, out_array); return; } diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 64a67237f6f9..edddd2b699ee 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -358,12 +358,12 @@ static inline void InvalidateOutputs(const std::vector &arrs, // TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added static inline void CreateDefaultInputs(const std::vector &arrs, - const std::vector &out_arrs) { + std::vector *out_arrs) { for (size_t i = 0; i < arrs.size(); ++i) { if (arrs[i].IsMKLDNNData()) - out_arrs[i] = arrs[i].Reorder2Default(); + out_arrs->emplace_back(arrs[i].Reorder2Default()); else - out_arrs[i] = arrs[i]; + out_arrs->emplace_back(arrs[i]); } } From f3e55e965a9165227021df307465a30a54ea5b92 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 08:13:59 -0700 Subject: [PATCH 24/25] create copy of arrays for fallback --- src/operator/nn/mkldnn/mkldnn_base-inl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index edddd2b699ee..35a2335c7292 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -361,9 +361,9 @@ static inline void CreateDefaultInputs(const std::vector &arrs, std::vector *out_arrs) { for (size_t i = 0; i < arrs.size(); ++i) { if (arrs[i].IsMKLDNNData()) - out_arrs->emplace_back(arrs[i].Reorder2Default()); + out_arrs->push_back(arrs[i].Reorder2Default()); else - out_arrs->emplace_back(arrs[i]); + out_arrs->push_back(arrs[i]); } } From dcaba17c219027614d8d219239e02a59ed813f14 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 10:14:36 -0700 Subject: [PATCH 25/25] empty array --- src/operator/nn/mkldnn/mkldnn_base-inl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index 35a2335c7292..6eb90f845d37 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -359,6 +359,7 @@ static inline void InvalidateOutputs(const std::vector &arrs, // TODO(alexzai): (MXNET-856) Remove helper function after subgraph feature added static inline void CreateDefaultInputs(const std::vector &arrs, std::vector *out_arrs) { + out_arrs->clear(); for (size_t i = 0; i < arrs.size(); ++i) { if (arrs[i].IsMKLDNNData()) out_arrs->push_back(arrs[i].Reorder2Default());