From 04bbb8ff448f33520da27c429b9fb1f425f7e4a6 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 09:24:08 -0700 Subject: [PATCH 01/15] fallback if not training in batch norm --- src/operator/nn/batch_norm.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index be542ba5b6be..963030abd4b6 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -421,7 +421,8 @@ void BatchNormGradComputeExCPU(const nnvm::NodeAttrs &attrs, TShape shape = inputs[0].shape(); // MKLDNN batchnorm only works well on the special MKLDNN layout. if (SupportMKLDNNBN(inputs[0], param) - && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData())) { + && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData()) + && ctx.is_train) { std::vector out_grad(1); std::vector out_data(3); std::vector in_data(3); From 783e6263e29197d80dcd4468b3e8f0e917ce576a Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 09:42:47 -0700 Subject: [PATCH 02/15] add convolution fallback --- src/operator/nn/convolution.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 53b0c1380ed3..9ff94374eec1 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -73,7 +73,7 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const ConvolutionParam& params = nnvm::get(attrs.parsed); - if (SupportMKLDNNConv(params, inputs[0])) { + if (SupportMKLDNNConv(params, inputs[0]) && ctx.is_train) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(ConvolutionGradCompute, attrs, ctx, inputs, req, outputs); From d0eeee7106a1afb0bb144140403b7774bfa6fadc Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 09:52:28 -0700 Subject: [PATCH 03/15] fallback for remaining mkldnn operators backwards --- src/operator/nn/activation.cc | 2 +- src/operator/nn/deconvolution.cc | 2 +- src/operator/nn/fully_connected.cc | 2 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/pooling.cc | 3 ++- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index ba44ebd4ed4d..31eaffa112fb 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -91,7 +91,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs, const ActivationParam& param = nnvm::get(attrs.parsed); bool relu = param.act_type == activation::kReLU; CHECK_EQ(inputs.size(), relu ? 2U : 3U); - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.is_train) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); // XXX: for y = relu(x), y is passed as "in_data" to Backward() MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0], diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 039c732c831d..bc6fa02f6390 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -312,7 +312,7 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const DeconvolutionParam& param = nnvm::get(attrs.parsed); - if (SupportMKLDNNDeconv(param, inputs[0])) { + if (SupportMKLDNNDeconv(param, inputs[0]) && ctx.is_train) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNDeconvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index a178b2759bf9..64b7010658c5 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -141,7 +141,7 @@ void FullyConnectedGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.is_train) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNFCBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(FullyConnectedGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 020cb479acc6..5871810db742 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -133,7 +133,7 @@ void LRNGradComputeExCPU(const nnvm::NodeAttrs &attrs, const NDArray &in_data = inputs[1]; const NDArray &in_grad = outputs[0]; - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.is_train) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNLRNBackward(ctx, param, out_grad, in_data, req[0], in_grad); MKLDNN_OPCHECK_RUN(LRNGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 611568807a9a..3fe3745907d6 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -270,7 +270,8 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, if (SupportMKLDNN(inputs[0]) - && SupportMKLDNNPooling(param, inputs[0].shape())) { + && SupportMKLDNNPooling(param, inputs[0].shape()) + && ctx.is_train) { const NDArray &out_grad = inputs[0]; const NDArray *workspace = nullptr; const NDArray *in_data = nullptr; From 7bcc9622359c9df1570ee5f2402b8fc19fb95c95 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 10:01:26 -0700 Subject: [PATCH 04/15] fall back if need_grad is false --- src/operator/nn/activation.cc | 2 +- src/operator/nn/batch_norm.cc | 2 +- src/operator/nn/convolution.cc | 2 +- src/operator/nn/deconvolution.cc | 2 +- src/operator/nn/fully_connected.cc | 2 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/pooling.cc | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index 31eaffa112fb..d646ec4d6f28 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -91,7 +91,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs, const ActivationParam& param = nnvm::get(attrs.parsed); bool relu = param.act_type == activation::kReLU; CHECK_EQ(inputs.size(), relu ? 2U : 3U); - if (SupportMKLDNN(inputs[0]) && ctx.is_train) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); // XXX: for y = relu(x), y is passed as "in_data" to Backward() MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0], diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 963030abd4b6..97454678a7f9 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -422,7 +422,7 @@ void BatchNormGradComputeExCPU(const nnvm::NodeAttrs &attrs, // MKLDNN batchnorm only works well on the special MKLDNN layout. if (SupportMKLDNNBN(inputs[0], param) && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData()) - && ctx.is_train) { + && ctx.need_grad) { std::vector out_grad(1); std::vector out_data(3); std::vector in_data(3); diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 9ff94374eec1..6c0681be9676 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -73,7 +73,7 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const ConvolutionParam& params = nnvm::get(attrs.parsed); - if (SupportMKLDNNConv(params, inputs[0]) && ctx.is_train) { + if (SupportMKLDNNConv(params, inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(ConvolutionGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index bc6fa02f6390..c86c31ddf2c8 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -312,7 +312,7 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const DeconvolutionParam& param = nnvm::get(attrs.parsed); - if (SupportMKLDNNDeconv(param, inputs[0]) && ctx.is_train) { + if (SupportMKLDNNDeconv(param, inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNDeconvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 64b7010658c5..9d68dc2f818f 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -141,7 +141,7 @@ void FullyConnectedGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { - if (SupportMKLDNN(inputs[0]) && ctx.is_train) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNFCBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(FullyConnectedGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 5871810db742..49eff2ad6c71 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -133,7 +133,7 @@ void LRNGradComputeExCPU(const nnvm::NodeAttrs &attrs, const NDArray &in_data = inputs[1]; const NDArray &in_grad = outputs[0]; - if (SupportMKLDNN(inputs[0]) && ctx.is_train) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNLRNBackward(ctx, param, out_grad, in_data, req[0], in_grad); MKLDNN_OPCHECK_RUN(LRNGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 3fe3745907d6..d94684fb377e 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -271,7 +271,7 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, if (SupportMKLDNN(inputs[0]) && SupportMKLDNNPooling(param, inputs[0].shape()) - && ctx.is_train) { + && ctx.need_grad) { const NDArray &out_grad = inputs[0]; const NDArray *workspace = nullptr; const NDArray *in_data = nullptr; From 64fe9c40f94511ab350ae2d7eafdcf7b20a51c58 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Thu, 30 Aug 2018 10:08:21 -0700 Subject: [PATCH 05/15] add unit test for record false --- tests/python/unittest/test_gluon.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 02dc6cee4a66..4941354141f0 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1220,8 +1220,8 @@ def check_hybrid_static_memory(**kwargs): net1(x) net2(x) - def test(net, x): - with mx.autograd.record(): + def test(net, x, record=True): + with mx.autograd.record(record): y = net(x) + net(x) y.backward() @@ -1229,12 +1229,13 @@ def test(net, x): return y, grads - y1, grads1 = test(net1, x) - y2, grads2 = test(net2, x) + for record in (True, False): + y1, grads1 = test(net1, x, record) + y2, grads2 = test(net2, x, record) - assert_almost_equal(y1.asnumpy(), y2.asnumpy(), rtol=1e-3, atol=1e-5) - for key in grads1: - assert_almost_equal(grads1[key].asnumpy(), grads2[key].asnumpy(), rtol=1e-3, atol=1e-5) + assert_almost_equal(y1.asnumpy(), y2.asnumpy(), rtol=1e-3, atol=1e-5) + for key in grads1: + assert_almost_equal(grads1[key].asnumpy(), grads2[key].asnumpy(), rtol=1e-3, atol=1e-5) @with_seed() def test_hybrid_static_memory(): From 30e0636058a972a06a5d3329c7daf76bac09503a Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 13:20:34 -0800 Subject: [PATCH 06/15] change to need_grad --- src/operator/nn/convolution.cc | 2 +- src/operator/nn/mkldnn/mkldnn_convolution.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 6c0681be9676..53b0c1380ed3 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -73,7 +73,7 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const ConvolutionParam& params = nnvm::get(attrs.parsed); - if (SupportMKLDNNConv(params, inputs[0]) && ctx.need_grad) { + if (SupportMKLDNNConv(params, inputs[0])) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(ConvolutionGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index dd1f3ec07d70..e80c6842043c 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -597,7 +597,7 @@ void MKLDNNConvolutionBackward(const nnvm::NodeAttrs& attrs, const OpContext &ct out_grad = out_grad.Reorder2Default(); mkldnn::convolution_forward::primitive_desc fwd_pd = GetConvFwdImpl( - full_param, ctx.is_train, data, weight, bias, out_grad); + full_param, ctx.need_grad, data, weight, bias, out_grad); const ConvolutionParam ¶m = full_param.conv_param; CHECK_NE(req[conv::kWeight], kWriteInplace) << "cannot write weight inplace"; From 6bda387c320a5b316d8940566631579cfc5dc01b Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 14:25:08 -0800 Subject: [PATCH 07/15] remove individual checks --- src/operator/nn/activation.cc | 2 +- src/operator/nn/batch_norm.cc | 3 +-- src/operator/nn/deconvolution.cc | 2 +- src/operator/nn/fully_connected.cc | 2 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/pooling.cc | 3 +-- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index d646ec4d6f28..ba44ebd4ed4d 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -91,7 +91,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs, const ActivationParam& param = nnvm::get(attrs.parsed); bool relu = param.act_type == activation::kReLU; CHECK_EQ(inputs.size(), relu ? 2U : 3U); - if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { + if (SupportMKLDNN(inputs[0])) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); // XXX: for y = relu(x), y is passed as "in_data" to Backward() MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0], diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index ea6d1b3470d7..6254a1e18662 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -421,8 +421,7 @@ void BatchNormGradComputeExCPU(const nnvm::NodeAttrs &attrs, TShape shape = inputs[0].shape(); // MKLDNN batchnorm only works well on the special MKLDNN layout. if (SupportMKLDNNBN(inputs[0], param) - && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData()) - && ctx.need_grad) { + && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData())) { std::vector out_grad(1); std::vector out_data(3); std::vector in_data(3); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index c86c31ddf2c8..039c732c831d 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -312,7 +312,7 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const DeconvolutionParam& param = nnvm::get(attrs.parsed); - if (SupportMKLDNNDeconv(param, inputs[0]) && ctx.need_grad) { + if (SupportMKLDNNDeconv(param, inputs[0])) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNDeconvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 9d68dc2f818f..a178b2759bf9 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -141,7 +141,7 @@ void FullyConnectedGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { - if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { + if (SupportMKLDNN(inputs[0])) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNFCBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(FullyConnectedGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 49eff2ad6c71..020cb479acc6 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -133,7 +133,7 @@ void LRNGradComputeExCPU(const nnvm::NodeAttrs &attrs, const NDArray &in_data = inputs[1]; const NDArray &in_grad = outputs[0]; - if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { + if (SupportMKLDNN(inputs[0])) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNLRNBackward(ctx, param, out_grad, in_data, req[0], in_grad); MKLDNN_OPCHECK_RUN(LRNGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index d94684fb377e..611568807a9a 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -270,8 +270,7 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, if (SupportMKLDNN(inputs[0]) - && SupportMKLDNNPooling(param, inputs[0].shape()) - && ctx.need_grad) { + && SupportMKLDNNPooling(param, inputs[0].shape())) { const NDArray &out_grad = inputs[0]; const NDArray *workspace = nullptr; const NDArray *in_data = nullptr; From 7a7ca2fd3f73eac6532afe9338b9f6c52d9abbd6 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 14:31:05 -0800 Subject: [PATCH 08/15] skip testing static_shape record false --- tests/python/unittest/test_gluon.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index d76fed292bba..ce2d49bc73be 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1209,7 +1209,7 @@ def test_zero_grad(): grad = net.collect_params()['test_zero_grad_weight'].grad() assert_almost_equal(grad.asnumpy(), grad.asnumpy() * 0) -def check_hybrid_static_memory(**kwargs): +def check_hybrid_static_memory(train_modes, **kwargs): x = mx.nd.random.uniform(shape=(2, 3, 32, 32)) x.attach_grad() @@ -1230,7 +1230,7 @@ def test(net, x, record=True): return y, grads - for record in (True, False): + for record in train_modes: y1, grads1 = test(net1, x, record) y2, grads2 = test(net2, x, record) @@ -1240,9 +1240,10 @@ def test(net, x, record=True): @with_seed() def test_hybrid_static_memory(): - check_hybrid_static_memory() - check_hybrid_static_memory(static_alloc=True) - check_hybrid_static_memory(static_alloc=True, static_shape=True) + check_hybrid_static_memory(train_mode=[True, False]) + check_hybrid_static_memory(train_mode=[True, False], static_alloc=True) + # TODO: MKLDNN does not work with static_shape backwards + check_hybrid_static_memory(train_mode=[True], static_alloc=True, static_shape=True) def check_hybrid_static_memory_switching(**kwargs): net = gluon.model_zoo.vision.get_resnet( From 62dd2d0bbe84561459221425669e63d981840d5b Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 14:50:28 -0800 Subject: [PATCH 09/15] add todo issue numbmer --- tests/python/unittest/test_gluon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index ce2d49bc73be..438ac34e2145 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1242,7 +1242,7 @@ def test(net, x, record=True): def test_hybrid_static_memory(): check_hybrid_static_memory(train_mode=[True, False]) check_hybrid_static_memory(train_mode=[True, False], static_alloc=True) - # TODO: MKLDNN does not work with static_shape backwards + # TODO: MKLDNN (issue #13445) does not work with static_shape backwards check_hybrid_static_memory(train_mode=[True], static_alloc=True, static_shape=True) def check_hybrid_static_memory_switching(**kwargs): From e5c5976cf72c2475bcbea925aad53ef9deb972de Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 16:28:09 -0800 Subject: [PATCH 10/15] fix typo --- tests/python/unittest/test_gluon.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 438ac34e2145..5291d50ab946 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1221,18 +1221,18 @@ def check_hybrid_static_memory(train_modes, **kwargs): net1(x) net2(x) - def test(net, x, record=True): - with mx.autograd.record(record): + def test(net, x, train_mode=True): + with mx.autograd.record(train_mode=train_mode): y = net(x) + net(x) - y.backward() + y.backward(train_mode=train_mode) grads = {k: v.grad() for k, v in net.collect_params().items() if v.grad_req != 'null'} return y, grads - for record in train_modes: - y1, grads1 = test(net1, x, record) - y2, grads2 = test(net2, x, record) + for train_mode in train_modes: + y1, grads1 = test(net1, x, train_mode) + y2, grads2 = test(net2, x, train_mode) assert_almost_equal(y1.asnumpy(), y2.asnumpy(), rtol=1e-3, atol=1e-5) for key in grads1: @@ -1240,10 +1240,10 @@ def test(net, x, record=True): @with_seed() def test_hybrid_static_memory(): - check_hybrid_static_memory(train_mode=[True, False]) - check_hybrid_static_memory(train_mode=[True, False], static_alloc=True) + check_hybrid_static_memory(train_modes=[True, False]) + check_hybrid_static_memory(train_modes=[True, False], static_alloc=True) # TODO: MKLDNN (issue #13445) does not work with static_shape backwards - check_hybrid_static_memory(train_mode=[True], static_alloc=True, static_shape=True) + check_hybrid_static_memory(train_modes=[True], static_alloc=True, static_shape=True) def check_hybrid_static_memory_switching(**kwargs): net = gluon.model_zoo.vision.get_resnet( From 895fd9cf147c8440e5b6678c394d0a1355da9118 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 16:40:24 -0800 Subject: [PATCH 11/15] set to need_grad other places --- src/operator/nn/batch_norm.cc | 4 ++-- src/operator/nn/mkldnn/mkldnn_deconvolution.cc | 2 +- src/operator/nn/mkldnn/mkldnn_lrn-inl.h | 2 +- src/operator/nn/mkldnn/mkldnn_pooling.cc | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 6254a1e18662..50a331912697 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -112,7 +112,7 @@ void BatchNormForwardImpl(mshadow::Stream *, AccReal *mean = meanVector.dptr(); AccReal *var = varianceVector.dptr(); - const bool is_train_and_not_global_stats = ctx.is_train && !param_.use_global_stats; + const bool is_train_and_not_global_stats = ctx.need_grad && !param_.use_global_stats; const size_t channelCount = inputData.ChannelCount(); const size_t itemCountPerChannel = inputData.Size() / channelCount; @@ -226,7 +226,7 @@ void BatchNormBackwardImpl(mshadow::Stream *, AccReal *gradWeightData = gradWeight.dptr(); AccReal *gradBiasData = gradBias.dptr(); - const bool is_train_and_not_global_stats = ctx.is_train && !param_.use_global_stats; + const bool is_train_and_not_global_stats = ctx.need_grad && !param_.use_global_stats; #pragma omp parallel for for (int channel = 0; channel < static_cast(channelCount); ++channel) { diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index a6d6b24235c8..67316fb1efb3 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -251,7 +251,7 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param, auto data_mem = in_data.GetMKLDNNDataReorder( fwd_pd.diff_dst_primitive_desc()); const mkldnn::memory *weight_mem; - if (ctx.is_train) { + if (ctx.need_grad) { // TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it // to the default format for now. if (weight.IsMKLDNNData()) diff --git a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h index 31b293a14c2c..325d0f6f3355 100644 --- a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h @@ -180,7 +180,7 @@ static MKLDNNLRNFwd &GetLRNFwd(const LRNParam& param, OpHash> lrn_fwds; #endif auto kind_ = - ctx.is_train ? prop_kind::forward_training : prop_kind::forward_scoring; + ctx.need_grad ? prop_kind::forward_training : prop_kind::forward_scoring; MKLDNNLRNSignature key(param); key.AddSign(kind_); diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc index f4d681ded78d..994fc2b1310c 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling.cc +++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc @@ -269,7 +269,7 @@ MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam ¶m, void MKLDNNPoolingCompute(const OpContext &ctx, const PoolingParam ¶m, const NDArray &in_data, const OpReqType req, const NDArray &out_data, const NDArray *workspace) { - auto &fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data); + auto &fwd = GetPoolingFwd(param, ctx.need_grad, in_data, out_data); fwd.SetNewMem(in_data, out_data, req, workspace); fwd.Execute(out_data); } From e9f3def4116ba068b7eef3acefeb830529c8f82d Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 17:44:41 -0800 Subject: [PATCH 12/15] retrigger From e774956da25e6f8525eff3936111d068bf2740bc Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 19:17:02 -0800 Subject: [PATCH 13/15] Revert "set to need_grad other places" This reverts commit 895fd9cf147c8440e5b6678c394d0a1355da9118. --- src/operator/nn/batch_norm.cc | 4 ++-- src/operator/nn/mkldnn/mkldnn_deconvolution.cc | 2 +- src/operator/nn/mkldnn/mkldnn_lrn-inl.h | 2 +- src/operator/nn/mkldnn/mkldnn_pooling.cc | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 50a331912697..6254a1e18662 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -112,7 +112,7 @@ void BatchNormForwardImpl(mshadow::Stream *, AccReal *mean = meanVector.dptr(); AccReal *var = varianceVector.dptr(); - const bool is_train_and_not_global_stats = ctx.need_grad && !param_.use_global_stats; + const bool is_train_and_not_global_stats = ctx.is_train && !param_.use_global_stats; const size_t channelCount = inputData.ChannelCount(); const size_t itemCountPerChannel = inputData.Size() / channelCount; @@ -226,7 +226,7 @@ void BatchNormBackwardImpl(mshadow::Stream *, AccReal *gradWeightData = gradWeight.dptr(); AccReal *gradBiasData = gradBias.dptr(); - const bool is_train_and_not_global_stats = ctx.need_grad && !param_.use_global_stats; + const bool is_train_and_not_global_stats = ctx.is_train && !param_.use_global_stats; #pragma omp parallel for for (int channel = 0; channel < static_cast(channelCount); ++channel) { diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc index 67316fb1efb3..a6d6b24235c8 100644 --- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc @@ -251,7 +251,7 @@ void MKLDNNDeconvForward::SetDataHandle(const DeconvolutionParam& param, auto data_mem = in_data.GetMKLDNNDataReorder( fwd_pd.diff_dst_primitive_desc()); const mkldnn::memory *weight_mem; - if (ctx.need_grad) { + if (ctx.is_train) { // TODO(zhengda) kvstore doesn't handle MKLDNN correctly. Let's reorder it // to the default format for now. if (weight.IsMKLDNNData()) diff --git a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h index 325d0f6f3355..31b293a14c2c 100644 --- a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h @@ -180,7 +180,7 @@ static MKLDNNLRNFwd &GetLRNFwd(const LRNParam& param, OpHash> lrn_fwds; #endif auto kind_ = - ctx.need_grad ? prop_kind::forward_training : prop_kind::forward_scoring; + ctx.is_train ? prop_kind::forward_training : prop_kind::forward_scoring; MKLDNNLRNSignature key(param); key.AddSign(kind_); diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc index 994fc2b1310c..f4d681ded78d 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling.cc +++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc @@ -269,7 +269,7 @@ MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam ¶m, void MKLDNNPoolingCompute(const OpContext &ctx, const PoolingParam ¶m, const NDArray &in_data, const OpReqType req, const NDArray &out_data, const NDArray *workspace) { - auto &fwd = GetPoolingFwd(param, ctx.need_grad, in_data, out_data); + auto &fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data); fwd.SetNewMem(in_data, out_data, req, workspace); fwd.Execute(out_data); } From c346942604243d6d7e183c28aad3541b0c803da2 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 19:17:18 -0800 Subject: [PATCH 14/15] Revert "remove individual checks" This reverts commit 6bda387c320a5b316d8940566631579cfc5dc01b. --- src/operator/nn/activation.cc | 2 +- src/operator/nn/batch_norm.cc | 3 ++- src/operator/nn/deconvolution.cc | 2 +- src/operator/nn/fully_connected.cc | 2 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/pooling.cc | 3 ++- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index ba44ebd4ed4d..d646ec4d6f28 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -91,7 +91,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs, const ActivationParam& param = nnvm::get(attrs.parsed); bool relu = param.act_type == activation::kReLU; CHECK_EQ(inputs.size(), relu ? 2U : 3U); - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); // XXX: for y = relu(x), y is passed as "in_data" to Backward() MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0], diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 6254a1e18662..ea6d1b3470d7 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -421,7 +421,8 @@ void BatchNormGradComputeExCPU(const nnvm::NodeAttrs &attrs, TShape shape = inputs[0].shape(); // MKLDNN batchnorm only works well on the special MKLDNN layout. if (SupportMKLDNNBN(inputs[0], param) - && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData())) { + && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData()) + && ctx.need_grad) { std::vector out_grad(1); std::vector out_data(3); std::vector in_data(3); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 039c732c831d..c86c31ddf2c8 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -312,7 +312,7 @@ static void DeconvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const DeconvolutionParam& param = nnvm::get(attrs.parsed); - if (SupportMKLDNNDeconv(param, inputs[0])) { + if (SupportMKLDNNDeconv(param, inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNDeconvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(DeconvolutionGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index a178b2759bf9..9d68dc2f818f 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -141,7 +141,7 @@ void FullyConnectedGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNFCBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(FullyConnectedGradCompute, attrs, ctx, inputs, req, diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 020cb479acc6..49eff2ad6c71 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -133,7 +133,7 @@ void LRNGradComputeExCPU(const nnvm::NodeAttrs &attrs, const NDArray &in_data = inputs[1]; const NDArray &in_grad = outputs[0]; - if (SupportMKLDNN(inputs[0])) { + if (SupportMKLDNN(inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNLRNBackward(ctx, param, out_grad, in_data, req[0], in_grad); MKLDNN_OPCHECK_RUN(LRNGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 611568807a9a..d94684fb377e 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -270,7 +270,8 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, if (SupportMKLDNN(inputs[0]) - && SupportMKLDNNPooling(param, inputs[0].shape())) { + && SupportMKLDNNPooling(param, inputs[0].shape()) + && ctx.need_grad) { const NDArray &out_grad = inputs[0]; const NDArray *workspace = nullptr; const NDArray *in_data = nullptr; From 44344a0e77979743a925c85c51338c2da5877de0 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Wed, 28 Nov 2018 19:17:25 -0800 Subject: [PATCH 15/15] Revert "change to need_grad" This reverts commit 30e0636058a972a06a5d3329c7daf76bac09503a. --- src/operator/nn/convolution.cc | 2 +- src/operator/nn/mkldnn/mkldnn_convolution.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 53b0c1380ed3..6c0681be9676 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -73,7 +73,7 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const ConvolutionParam& params = nnvm::get(attrs.parsed); - if (SupportMKLDNNConv(params, inputs[0])) { + if (SupportMKLDNNConv(params, inputs[0]) && ctx.need_grad) { MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs); MKLDNNConvolutionBackward(attrs, ctx, inputs, req, outputs); MKLDNN_OPCHECK_RUN(ConvolutionGradCompute, attrs, ctx, inputs, req, outputs); diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc index e80c6842043c..dd1f3ec07d70 100644 --- a/src/operator/nn/mkldnn/mkldnn_convolution.cc +++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc @@ -597,7 +597,7 @@ void MKLDNNConvolutionBackward(const nnvm::NodeAttrs& attrs, const OpContext &ct out_grad = out_grad.Reorder2Default(); mkldnn::convolution_forward::primitive_desc fwd_pd = GetConvFwdImpl( - full_param, ctx.need_grad, data, weight, bias, out_grad); + full_param, ctx.is_train, data, weight, bias, out_grad); const ConvolutionParam ¶m = full_param.conv_param; CHECK_NE(req[conv::kWeight], kWriteInplace) << "cannot write weight inplace";