From 02f7ba38c93b22cec8da8840878793be41d897df Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 19 Nov 2021 10:53:59 +0100 Subject: [PATCH 1/5] Keep full path to a file in directory --- src/operator/nn/dnnl/dnnl_act-inl.h | 4 +- src/operator/nn/dnnl/dnnl_act.cc | 4 +- src/operator/nn/dnnl/dnnl_base.cc | 2 +- src/operator/nn/dnnl/dnnl_batch_dot-inl.h | 2 +- src/operator/nn/dnnl/dnnl_batch_dot.cc | 2 +- src/operator/nn/dnnl/dnnl_batch_norm-inl.h | 2 +- src/operator/nn/dnnl/dnnl_concat-inl.h | 2 +- src/operator/nn/dnnl/dnnl_concat.cc | 2 +- src/operator/nn/dnnl/dnnl_convolution-inl.h | 2 +- src/operator/nn/dnnl/dnnl_convolution.cc | 2 +- src/operator/nn/dnnl/dnnl_deconvolution-inl.h | 2 +- src/operator/nn/dnnl/dnnl_deconvolution.cc | 2 +- .../nn/dnnl/dnnl_fully_connected-inl.h | 2 +- src/operator/nn/dnnl/dnnl_fully_connected.cc | 2 +- src/operator/nn/dnnl/dnnl_layer_norm-inl.h | 2 +- src/operator/nn/dnnl/dnnl_log_softmax.cc | 2 +- src/operator/nn/dnnl/dnnl_lrn-inl.h | 2 +- src/operator/nn/dnnl/dnnl_pooling-inl.h | 2 +- src/operator/nn/dnnl/dnnl_reshape-inl.h | 2 +- src/operator/nn/dnnl/dnnl_reshape.cc | 2 +- src/operator/nn/dnnl/dnnl_rnn-inl.h | 2 +- src/operator/nn/dnnl/dnnl_slice-inl.h | 4 +- src/operator/nn/dnnl/dnnl_softmax-inl.h | 2 +- src/operator/nn/dnnl/dnnl_softmax_output.cc | 4 +- src/operator/nn/dnnl/dnnl_sum.cc | 2 +- src/operator/nn/dnnl/dnnl_transpose-inl.h | 2 +- src/operator/nn/dnnl/dnnl_transpose.cc | 3 +- .../quantization/dnnl/dnnl_dequantize-inl.h | 2 +- .../quantization/dnnl/dnnl_quantize-inl.h | 4 +- .../quantization/dnnl/dnnl_quantize_v2-inl.h | 4 +- .../quantization/dnnl/dnnl_quantized_act.cc | 4 +- .../dnnl/dnnl_quantized_batch_norm.cc | 4 +- .../dnnl/dnnl_quantized_concat.cc | 4 +- .../quantization/dnnl/dnnl_quantized_conv.cc | 12 +- .../dnnl/dnnl_quantized_elemwise_add.cc | 8 +- .../dnnl/dnnl_quantized_flatten.cc | 4 +- .../dnnl/dnnl_quantized_fully_connected.cc | 4 +- .../dnnl/dnnl_quantized_pooling.cc | 2 +- .../quantization/dnnl/dnnl_requantize-inl.h | 4 +- src/operator/subgraph/dnnl/dnnl_batch_dot.cc | 12 +- .../subgraph/dnnl/dnnl_batch_dot_property.h | 8 +- .../subgraph/dnnl/dnnl_bn_relu_property.h | 10 +- src/operator/subgraph/dnnl/dnnl_common.h | 4 +- src/operator/subgraph/dnnl/dnnl_conv-inl.h | 9 +- src/operator/subgraph/dnnl/dnnl_conv.cc | 16 +- .../subgraph/dnnl/dnnl_conv_property.h | 15 +- .../dnnl_elemwisemul_post_quantize_property.h | 232 ++++++++++++++++++ src/operator/subgraph/dnnl/dnnl_fc-inl.h | 5 +- src/operator/subgraph/dnnl/dnnl_fc.cc | 18 +- .../dnnl/dnnl_fc_post_quantize_property.h | 231 +++++++++++++++++ src/operator/subgraph/dnnl/dnnl_fc_property.h | 9 +- .../dnnl/dnnl_matmul_post_quantize_property.h | 203 +++++++++++++++ .../dnnl_post_quantize_align_scale_property.h | 5 +- .../dnnl/dnnl_post_quantize_property.h | 1 + .../subgraph/dnnl/dnnl_subgraph_base-inl.h | 3 +- .../subgraph/dnnl/dnnl_transformer-inl.h | 4 +- .../subgraph/dnnl/dnnl_transformer.cc | 8 +- .../dnnl/dnnl_transformer_qk_property.h | 15 +- .../dnnl/dnnl_transformer_valatt_property.h | 17 +- 59 files changed, 812 insertions(+), 132 deletions(-) create mode 100644 src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h create mode 100644 src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h create mode 100644 src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h diff --git a/src/operator/nn/dnnl/dnnl_act-inl.h b/src/operator/nn/dnnl/dnnl_act-inl.h index 3c8c16b1558b..66f229962fe7 100644 --- a/src/operator/nn/dnnl/dnnl_act-inl.h +++ b/src/operator/nn/dnnl/dnnl_act-inl.h @@ -30,8 +30,8 @@ #include #include -#include "../../leaky_relu-inl.h" -#include "../activation-inl.h" +#include "operator/leaky_relu-inl.h" +#include "operator/nn/activation-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_act.cc b/src/operator/nn/dnnl/dnnl_act.cc index 90a8fd0787f8..a89ed26f13a9 100644 --- a/src/operator/nn/dnnl/dnnl_act.cc +++ b/src/operator/nn/dnnl/dnnl_act.cc @@ -35,9 +35,9 @@ #include #include -#include "../../operator_common.h" +#include "operator/operator_common.h" #include "./dnnl_base-inl.h" -#include "dnnl_act-inl.h" +#include "./dnnl_act-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_base.cc b/src/operator/nn/dnnl/dnnl_base.cc index 73e9225aa823..1e39d6642585 100644 --- a/src/operator/nn/dnnl/dnnl_base.cc +++ b/src/operator/nn/dnnl/dnnl_base.cc @@ -22,7 +22,7 @@ #include #include "../../../common/exec_utils.h" -#include "../../operator_common.h" +#include "operator/operator_common.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_batch_dot-inl.h b/src/operator/nn/dnnl/dnnl_batch_dot-inl.h index ee828b3a9037..7da7395b6aa1 100644 --- a/src/operator/nn/dnnl/dnnl_batch_dot-inl.h +++ b/src/operator/nn/dnnl/dnnl_batch_dot-inl.h @@ -31,7 +31,7 @@ #include #include -#include "../../tensor/dot-inl.h" +#include "operator/tensor/dot-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_batch_dot.cc b/src/operator/nn/dnnl/dnnl_batch_dot.cc index 71d08fa6b2dd..ff45f19646b8 100644 --- a/src/operator/nn/dnnl/dnnl_batch_dot.cc +++ b/src/operator/nn/dnnl/dnnl_batch_dot.cc @@ -25,7 +25,7 @@ #if MXNET_USE_ONEDNN == 1 #include "./dnnl_batch_dot-inl.h" -#include "../../quantization/quantization_utils.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_batch_norm-inl.h b/src/operator/nn/dnnl/dnnl_batch_norm-inl.h index 3902b2eef8ce..f74329a935d8 100644 --- a/src/operator/nn/dnnl/dnnl_batch_norm-inl.h +++ b/src/operator/nn/dnnl/dnnl_batch_norm-inl.h @@ -31,7 +31,7 @@ #include #include -#include "../batch_norm-inl.h" +#include "operator/nn/batch_norm-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_concat-inl.h b/src/operator/nn/dnnl/dnnl_concat-inl.h index 294582ab47ea..cfb54ee88683 100644 --- a/src/operator/nn/dnnl/dnnl_concat-inl.h +++ b/src/operator/nn/dnnl/dnnl_concat-inl.h @@ -29,7 +29,7 @@ #include #include -#include "../concat-inl.h" +#include "operator/nn/concat-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_concat.cc b/src/operator/nn/dnnl/dnnl_concat.cc index 83ba9df24543..f8052fdc3bfe 100644 --- a/src/operator/nn/dnnl/dnnl_concat.cc +++ b/src/operator/nn/dnnl/dnnl_concat.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "dnnl_concat-inl.h" +#include "./dnnl_concat-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_convolution-inl.h b/src/operator/nn/dnnl/dnnl_convolution-inl.h index 529b6c3caa50..5af8d0db1f68 100644 --- a/src/operator/nn/dnnl/dnnl_convolution-inl.h +++ b/src/operator/nn/dnnl/dnnl_convolution-inl.h @@ -30,7 +30,7 @@ #include #include -#include "../convolution-inl.h" +#include "operator/nn/convolution-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_convolution.cc b/src/operator/nn/dnnl/dnnl_convolution.cc index 314bc62175e3..905fa64a658f 100644 --- a/src/operator/nn/dnnl/dnnl_convolution.cc +++ b/src/operator/nn/dnnl/dnnl_convolution.cc @@ -25,7 +25,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "../convolution-inl.h" +#include "operator/nn/convolution-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_convolution-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h index 92c1d6bed1f2..a15a301865f7 100644 --- a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h +++ b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h @@ -41,7 +41,7 @@ #include #include -#include "../deconvolution-inl.h" +#include "operator/nn/deconvolution-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_deconvolution.cc b/src/operator/nn/dnnl/dnnl_deconvolution.cc index b853d1a1e52e..b15947e3f81f 100644 --- a/src/operator/nn/dnnl/dnnl_deconvolution.cc +++ b/src/operator/nn/dnnl/dnnl_deconvolution.cc @@ -23,7 +23,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "../deconvolution-inl.h" +#include "operator/nn/deconvolution-inl.h" #include "./dnnl_deconvolution-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_fully_connected-inl.h b/src/operator/nn/dnnl/dnnl_fully_connected-inl.h index 980b931851f3..a71770f3e199 100644 --- a/src/operator/nn/dnnl/dnnl_fully_connected-inl.h +++ b/src/operator/nn/dnnl/dnnl_fully_connected-inl.h @@ -31,7 +31,7 @@ #include #include -#include "../fully_connected-inl.h" +#include "operator/nn/fully_connected-inl.h" #include "./dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_fully_connected.cc b/src/operator/nn/dnnl/dnnl_fully_connected.cc index eca90b7cf4c6..e2d65cb1b245 100644 --- a/src/operator/nn/dnnl/dnnl_fully_connected.cc +++ b/src/operator/nn/dnnl/dnnl_fully_connected.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "dnnl_fully_connected-inl.h" +#include "./dnnl_fully_connected-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_layer_norm-inl.h b/src/operator/nn/dnnl/dnnl_layer_norm-inl.h index ccd3e9cb00ea..d03bf798acac 100644 --- a/src/operator/nn/dnnl/dnnl_layer_norm-inl.h +++ b/src/operator/nn/dnnl/dnnl_layer_norm-inl.h @@ -29,7 +29,7 @@ #include #include -#include "../layer_norm-inl.h" +#include "operator/nn/layer_norm-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_log_softmax.cc b/src/operator/nn/dnnl/dnnl_log_softmax.cc index 9408e6019610..615c539de7aa 100644 --- a/src/operator/nn/dnnl/dnnl_log_softmax.cc +++ b/src/operator/nn/dnnl/dnnl_log_softmax.cc @@ -22,7 +22,7 @@ * \brief Implementation of log_softmax function with DNNL support */ -#include "../softmax-inl.h" +#include "operator/nn/softmax-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_lrn-inl.h b/src/operator/nn/dnnl/dnnl_lrn-inl.h index 842705b254ee..90d43fc41bd6 100644 --- a/src/operator/nn/dnnl/dnnl_lrn-inl.h +++ b/src/operator/nn/dnnl/dnnl_lrn-inl.h @@ -30,7 +30,7 @@ #include #include -#include "../lrn-inl.h" +#include "operator/nn/lrn-inl.h" #include "./dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_pooling-inl.h b/src/operator/nn/dnnl/dnnl_pooling-inl.h index 15a544e38fd9..fa65ba51919f 100644 --- a/src/operator/nn/dnnl/dnnl_pooling-inl.h +++ b/src/operator/nn/dnnl/dnnl_pooling-inl.h @@ -29,7 +29,7 @@ #include #include -#include "../pooling-inl.h" +#include "operator/nn/pooling-inl.h" #include "./dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_reshape-inl.h b/src/operator/nn/dnnl/dnnl_reshape-inl.h index a814c1d958d0..04e1fecb0122 100644 --- a/src/operator/nn/dnnl/dnnl_reshape-inl.h +++ b/src/operator/nn/dnnl/dnnl_reshape-inl.h @@ -28,7 +28,7 @@ #if MXNET_USE_ONEDNN == 1 #include -#include "../../tensor/matrix_op-inl.h" +#include "operator/tensor/matrix_op-inl.h" #include "dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_reshape.cc b/src/operator/nn/dnnl/dnnl_reshape.cc index 5d2591916271..5602f78ef8e2 100644 --- a/src/operator/nn/dnnl/dnnl_reshape.cc +++ b/src/operator/nn/dnnl/dnnl_reshape.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../tensor/elemwise_unary_op.h" +#include "operator/tensor/elemwise_unary_op.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" #include "./dnnl_reshape-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_rnn-inl.h b/src/operator/nn/dnnl/dnnl_rnn-inl.h index bd2a63f7a908..3fc779446889 100644 --- a/src/operator/nn/dnnl/dnnl_rnn-inl.h +++ b/src/operator/nn/dnnl/dnnl_rnn-inl.h @@ -30,7 +30,7 @@ #include -#include "../../rnn-inl.h" +#include "operator/rnn-inl.h" #include "./dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_slice-inl.h b/src/operator/nn/dnnl/dnnl_slice-inl.h index 64ef19a5ea38..a145c6f82e09 100644 --- a/src/operator/nn/dnnl/dnnl_slice-inl.h +++ b/src/operator/nn/dnnl/dnnl_slice-inl.h @@ -34,8 +34,8 @@ #include -#include "../../operator_common.h" -#include "../../tensor/slice-inl.h" +#include "operator/operator_common.h" +#include "operator/tensor/slice-inl.h" #include "./dnnl_base-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_softmax-inl.h b/src/operator/nn/dnnl/dnnl_softmax-inl.h index 0978ab0cdfe1..17ae88c54263 100644 --- a/src/operator/nn/dnnl/dnnl_softmax-inl.h +++ b/src/operator/nn/dnnl/dnnl_softmax-inl.h @@ -39,7 +39,7 @@ #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" -#include "../softmax-inl.h" +#include "operator/nn/softmax-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_softmax_output.cc b/src/operator/nn/dnnl/dnnl_softmax_output.cc index 7a7d3991dc6c..168c4152d90e 100644 --- a/src/operator/nn/dnnl/dnnl_softmax_output.cc +++ b/src/operator/nn/dnnl/dnnl_softmax_output.cc @@ -24,9 +24,11 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../softmax_output-inl.h" + +#include "operator/softmax_output-inl.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" + namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_sum.cc b/src/operator/nn/dnnl/dnnl_sum.cc index 14b7deafd66e..35ded9854083 100644 --- a/src/operator/nn/dnnl/dnnl_sum.cc +++ b/src/operator/nn/dnnl/dnnl_sum.cc @@ -24,7 +24,7 @@ */ #include -#include "../../operator_common.h" +#include "operator/operator_common.h" #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_transpose-inl.h b/src/operator/nn/dnnl/dnnl_transpose-inl.h index 65be51c1e3de..1e1cef116a54 100644 --- a/src/operator/nn/dnnl/dnnl_transpose-inl.h +++ b/src/operator/nn/dnnl/dnnl_transpose-inl.h @@ -29,7 +29,7 @@ #include "./dnnl_base-inl.h" #include "./dnnl_ops-inl.h" -#include "../../numpy/np_matrix_op-inl.h" +#include "operator/numpy/np_matrix_op-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_transpose.cc b/src/operator/nn/dnnl/dnnl_transpose.cc index 40cba4109725..29628adf4059 100644 --- a/src/operator/nn/dnnl/dnnl_transpose.cc +++ b/src/operator/nn/dnnl/dnnl_transpose.cc @@ -25,8 +25,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "../../tensor/matrix_op-inl.h" - +#include "operator/tensor/matrix_op-inl.h" #include "./dnnl_transpose-inl.h" namespace mxnet { diff --git a/src/operator/quantization/dnnl/dnnl_dequantize-inl.h b/src/operator/quantization/dnnl/dnnl_dequantize-inl.h index 0c4e417f4c1c..cfbdd12dbf88 100644 --- a/src/operator/quantization/dnnl/dnnl_dequantize-inl.h +++ b/src/operator/quantization/dnnl/dnnl_dequantize-inl.h @@ -30,7 +30,7 @@ #include #include -#include "../../nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantize-inl.h b/src/operator/quantization/dnnl/dnnl_quantize-inl.h index 13f2e1e4acdc..56fa3152d130 100644 --- a/src/operator/quantization/dnnl/dnnl_quantize-inl.h +++ b/src/operator/quantization/dnnl/dnnl_quantize-inl.h @@ -30,8 +30,8 @@ #include #include -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../quantize-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/quantization/quantize-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantize_v2-inl.h b/src/operator/quantization/dnnl/dnnl_quantize_v2-inl.h index 61811329e58d..4effb39680ee 100644 --- a/src/operator/quantization/dnnl/dnnl_quantize_v2-inl.h +++ b/src/operator/quantization/dnnl/dnnl_quantize_v2-inl.h @@ -29,8 +29,8 @@ #include #include -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../quantize_v2-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/quantization/quantize_v2-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_act.cc b/src/operator/quantization/dnnl/dnnl_quantized_act.cc index 0aea4b471ccc..6cf31a0ee20d 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_act.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_act.cc @@ -23,8 +23,8 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../quantization_utils.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_batch_norm.cc b/src/operator/quantization/dnnl/dnnl_quantized_batch_norm.cc index f8c79482cc9c..3f13775717d7 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_batch_norm.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_batch_norm.cc @@ -24,8 +24,8 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_batch_norm-inl.h" -#include "../quantization_utils.h" +#include "operator/nn/dnnl/dnnl_batch_norm-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_concat.cc b/src/operator/quantization/dnnl/dnnl_quantized_concat.cc index 3409ec2713f0..a6f9e85427c8 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_concat.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_concat.cc @@ -23,8 +23,8 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_concat-inl.h" -#include "../quantization_utils.h" +#include "operator/nn/dnnl/dnnl_concat-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_conv.cc b/src/operator/quantization/dnnl/dnnl_quantized_conv.cc index 934a24ccb6ab..6f796ad799b8 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_conv.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_conv.cc @@ -24,12 +24,12 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../elemwise_op_common.h" -#include "../../nn/convolution-inl.h" -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../../nn/dnnl/dnnl_convolution-inl.h" -#include "../../tensor/matrix_op-inl.h" -#include "../quantization_utils.h" +#include "operator/elemwise_op_common.h" +#include "operator/nn/convolution-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_convolution-inl.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc index 4dbe64836899..c9d1157eb65f 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc @@ -23,10 +23,10 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../quantization_utils.h" -#include "../quantized_elemwise_add-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" +#include "operator/quantization/quantized_elemwise_add-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_flatten.cc b/src/operator/quantization/dnnl/dnnl_quantized_flatten.cc index 12eb01f39183..a605a16813b5 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_flatten.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_flatten.cc @@ -23,8 +23,8 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../quantization_utils.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_fully_connected.cc b/src/operator/quantization/dnnl/dnnl_quantized_fully_connected.cc index 5e70c4ea457b..774612910aff 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_fully_connected.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_fully_connected.cc @@ -24,8 +24,8 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_fully_connected-inl.h" -#include "../quantization_utils.h" +#include "operator/nn/dnnl/dnnl_fully_connected-inl.h" +#include "operator/quantization/quantization_utils.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc b/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc index a6f89ee6b875..d0a2154068bd 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_pooling.cc @@ -25,7 +25,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "../../nn/dnnl/dnnl_pooling-inl.h" +#include "operator/nn/dnnl/dnnl_pooling-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_requantize-inl.h b/src/operator/quantization/dnnl/dnnl_requantize-inl.h index 2dc61d6d3430..3095b87bd4ad 100644 --- a/src/operator/quantization/dnnl/dnnl_requantize-inl.h +++ b/src/operator/quantization/dnnl/dnnl_requantize-inl.h @@ -29,8 +29,8 @@ #include #include -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../requantize-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/quantization/requantize-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_batch_dot.cc b/src/operator/subgraph/dnnl/dnnl_batch_dot.cc index 612629da6ccf..c9beffc90c6a 100644 --- a/src/operator/subgraph/dnnl/dnnl_batch_dot.cc +++ b/src/operator/subgraph/dnnl/dnnl_batch_dot.cc @@ -28,12 +28,12 @@ #include #include -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../../nn/dnnl/dnnl_batch_dot-inl.h" -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../../quantization/quantization_utils.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_batch_dot-inl.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h b/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h index c4dee3ef4c4c..238c6efd9691 100644 --- a/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h +++ b/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h @@ -19,15 +19,15 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_BATCH_DOT_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_BATCH_DOT_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include -#include "../../tensor/dot-inl.h" -#include "../common.h" - -#include "dnnl_subgraph_base-inl.h" +#include "operator/tensor/dot-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h b/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h index 4d39bbeb6869..8603a6dbaa5c 100644 --- a/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h +++ b/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h @@ -19,15 +19,17 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_BN_RELU_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_BN_RELU_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include -#include "../../nn/dnnl/dnnl_act-inl.h" -#include "../../nn/dnnl/dnnl_batch_norm-inl.h" -#include "../common.h" -#include "dnnl_subgraph_base-inl.h" +#include "operator/nn/dnnl/dnnl_act-inl.h" +#include "operator/nn/dnnl/dnnl_batch_norm-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" + namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_common.h b/src/operator/subgraph/dnnl/dnnl_common.h index 7fdc7ec52811..68f10c0b6cc1 100644 --- a/src/operator/subgraph/dnnl/dnnl_common.h +++ b/src/operator/subgraph/dnnl/dnnl_common.h @@ -25,10 +25,12 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_COMMON_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_COMMON_H_ + #if MXNET_USE_ONEDNN == 1 + #include -#include "../../numpy/np_matrix_op-inl.h" +#include "operator/numpy/np_matrix_op-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_conv-inl.h b/src/operator/subgraph/dnnl/dnnl_conv-inl.h index 81b7e550ecb0..ef6967048d80 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv-inl.h +++ b/src/operator/subgraph/dnnl/dnnl_conv-inl.h @@ -19,16 +19,17 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_CONV_INL_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_CONV_INL_H_ + #if MXNET_USE_ONEDNN == 1 #include #include #include -#include "../../nn/activation-inl.h" -#include "../../nn/batch_norm-inl.h" -#include "../../nn/convolution-inl.h" -#include "../../nn/dnnl/dnnl_convolution-inl.h" +#include "operator/nn/activation-inl.h" +#include "operator/nn/batch_norm-inl.h" +#include "operator/nn/convolution-inl.h" +#include "operator/nn/dnnl/dnnl_convolution-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_conv.cc b/src/operator/subgraph/dnnl/dnnl_conv.cc index 7bc1b249a44d..2ce32dbfe134 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv.cc +++ b/src/operator/subgraph/dnnl/dnnl_conv.cc @@ -23,14 +23,14 @@ #include #include -#include "../../nn/dnnl/dnnl_act-inl.h" -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../../quantization/quantization_utils.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_common.h" -#include "dnnl_conv-inl.h" +#include "operator/nn/dnnl/dnnl_act-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_common.h" +#include "./dnnl_conv-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_conv_property.h b/src/operator/subgraph/dnnl/dnnl_conv_property.h index 3bb08a5eb373..479cd21ecaa4 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv_property.h +++ b/src/operator/subgraph/dnnl/dnnl_conv_property.h @@ -19,18 +19,19 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_CONV_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_CONV_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include -#include "../../leaky_relu-inl.h" -#include "../../nn/activation-inl.h" -#include "../../nn/convolution-inl.h" -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_subgraph_base-inl.h" +#include "operator/leaky_relu-inl.h" +#include "operator/nn/activation-inl.h" +#include "operator/nn/convolution-inl.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h new file mode 100644 index 000000000000..4abc4e051986 --- /dev/null +++ b/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file dnnl_elemwisemul_post_quantize_property.cc + * \brief Partition gragph property for oneDNN Quantized ElemwiseMul operator + * \author Xinyu Chen + */ + +#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ +#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ + +#if MXNET_USE_ONEDNN == 1 + +#include +#include +#include + +#include "operator/quantization/requantize-inl.h" +#include "operator/tensor/elemwise_binary_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" + +namespace mxnet { +namespace op { + +#define QUANTIZED_ElemwiseMul_NAME "_contrib_quantized_elemwise_mul" + +class ElemwiseMulPostQuantizeSelector : public SubgraphSelectorV2 { + public: + /*! \brief pattern match status */ + enum SelectStatus { + kFail = 0, + kStart, + kRequantize, + kSuccess, + }; + + private: + bool disable_all; + bool disable_float_output; + SelectStatus status; + std::vector matched_list; + + public: + explicit ElemwiseMulPostQuantizeSelector(const bool dis_all, const bool dis_float_output) + : disable_all(dis_all), disable_float_output(dis_float_output) {} + + bool Select(const BiDirectedNode& n) override { + const auto rawnode = n.node; + if ((!disable_all) && rawnode->op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { + status = disable_all ? kSuccess : kStart; + matched_list.clear(); + matched_list.push_back(&n); + return true; + } + return false; + } + + bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { + return false; + } + + bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { + const auto raw_node = n.node; + const auto raw_new_node = new_node.node; + if (status == kFail || status == kSuccess || raw_new_node->is_variable()) + return false; + // If n isn't the last matched node, then we encoutered a internal + // branch, we should pop out the node behind n and stop fusion. + if (matched_list.back() != &n) { + if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { + while (matched_list.back() != &n) { + matched_list.pop_back(); + } + } + + status = kSuccess; + return false; + } + + switch (status) { + case kStart: + if (raw_new_node->op() == Op::Get("_contrib_requantize")) { + auto const& param = nnvm::get(raw_new_node->attrs.parsed); + if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { + matched_list.push_back(&new_node); + status = kRequantize; + return true; + } + } + case kRequantize: + if ((!disable_float_output) && (raw_new_node->op() == Op::Get("_contrib_dequantize"))) { + CHECK(raw_node->op() == Op::Get("_contrib_requantize")); + if (n.outputs.size() > 1) { + // check if requantize have other outputs than dequantize + // if it has we can't fuse dequantize into elemwise_mul + for (auto kv : n.outputs) { + const auto& node = kv.first; + if (node->op() != Op::Get("_contrib_dequantize")) { + status = kSuccess; + return false; + } + } + } + + matched_list.push_back(&new_node); + status = kSuccess; + return true; + } + default: + status = kSuccess; + return false; + } + } + + std::vector Filter(const std::vector& candidates) override { + if ((status != kSuccess) || (matched_list.size() <= 1)) { + return std::vector(0); + } else { + std::vector ret; + for (auto i : matched_list) { + auto non_const_i = const_cast(i); + if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { + ret.push_back(non_const_i); + } + } + return ret; + } + } + + void Reset() override { + CHECK_GE(matched_list.size(), 1); + auto new_selector = ElemwiseMulPostQuantizeSelector(disable_all, disable_float_output); + new_selector.Select(*matched_list[0]); + *this = new_selector; + } +}; + +class ElemwiseMulPostQuantizeProperty : public SubgraphProperty { + public: + ElemwiseMulPostQuantizeProperty() { + disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QEM_FUSE_ALL", false); + disable_float_output = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QEM_FLOAT_OUTPUT", false); + } + + static SubgraphPropertyPtr Create() { + static const std::string& name = "oneDNN EltwiseMul post-quantization optimization pass"; + auto property = std::make_shared(); + property->SetAttr("property_name", name); + property->SetAttr("inference_only", true); + return property; + } + + nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, + const int subgraph_id = 0) const override { + nnvm::ObjectPtr em_node = nullptr; + nnvm::ObjectPtr requantize_node = nullptr; + nnvm::ObjectPtr dequantize_node = nullptr; + + DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { + if (node->is_variable()) + return; + if (node->op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { + em_node = node; + } else if (node->op() == Op::Get("_contrib_requantize")) { + requantize_node = node; + } else if (node->op() == Op::Get("_contrib_dequantize")) { + dequantize_node = node; + } + }); + + CHECK_NOTNULL(em_node); + CHECK_NOTNULL(requantize_node); + auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); + CHECK(requantize_param.min_calib_range.has_value()); + CHECK(requantize_param.max_calib_range.has_value()); + + // When only fused quantized_elemwise_mul and requantize, set min/max_cablib_range, + // When fused quantized_elemwise_mul + requantize + dequantize, set dequantize flag to true. + if (dequantize_node != nullptr) { + em_node->attrs.dict["enable_float_output"] = "True"; + } else { + em_node->attrs.dict["min_calib_range"] = + std::to_string(requantize_param.min_calib_range.value()); + em_node->attrs.dict["max_calib_range"] = + std::to_string(requantize_param.max_calib_range.value()); + } + em_node->op()->attr_parser(&(em_node->attrs)); + return em_node; + } + + SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { + auto selector = + std::make_shared(disable_fuse_all, disable_float_output); + return selector; + } + + void ConnectSubgraphOutputs(const nnvm::ObjectPtr n, + std::vector* output_entries) const override { + for (size_t i = 0; i < output_entries->size(); ++i) { + auto entry_ptr = output_entries->at(i); + *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0}; + } + } + + private: + bool disable_fuse_all; + bool disable_float_output; +}; + +} // namespace op +} // namespace mxnet + +#endif // if MXNET_USE_ONEDNN == 1 +#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/dnnl/dnnl_fc-inl.h b/src/operator/subgraph/dnnl/dnnl_fc-inl.h index ba1beac46fc0..021ffc4360c7 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc-inl.h +++ b/src/operator/subgraph/dnnl/dnnl_fc-inl.h @@ -19,14 +19,15 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_INL_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_INL_H_ + #if MXNET_USE_ONEDNN == 1 #include #include #include -#include "../../nn/dnnl/dnnl_fully_connected-inl.h" -#include "dnnl.hpp" +#include "operator/nn/dnnl/dnnl_fully_connected-inl.h" +#include "./dnnl.hpp" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc.cc b/src/operator/subgraph/dnnl/dnnl_fc.cc index 51989cad3595..77a0d64d9a45 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc.cc +++ b/src/operator/subgraph/dnnl/dnnl_fc.cc @@ -29,15 +29,15 @@ #include #include -#include "../../nn/dnnl/dnnl_act-inl.h" -#include "../../nn/dnnl/dnnl_base-inl.h" -#include "../../nn/dnnl/dnnl_fully_connected-inl.h" -#include "../../nn/dnnl/dnnl_ops-inl.h" -#include "../../quantization/quantization_utils.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_common.h" -#include "dnnl_fc-inl.h" +#include "operator/nn/dnnl/dnnl_act-inl.h" +#include "operator/nn/dnnl/dnnl_base-inl.h" +#include "operator/nn/dnnl/dnnl_fully_connected-inl.h" +#include "operator/nn/dnnl/dnnl_ops-inl.h" +#include "operator/quantization/quantization_utils.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_common.h" +#include "./dnnl_fc-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h new file mode 100644 index 000000000000..692fd42bf0a4 --- /dev/null +++ b/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file dnnl_fc_post_quantize_property.cc + * \brief Partition gragph property for oneDNN Quantized FullyConnected operator + * \author Ciyong Chen + */ + +#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ +#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ + +#if MXNET_USE_ONEDNN == 1 + +#include +#include +#include + +#include "operator/nn/fully_connected-inl.h" +#include "operator/quantization/requantize-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" + +namespace mxnet { +namespace op { + +#define QUANTIZED_FC_NAME "_sg_onednn_fully_connected" + +class SgDNNLFCPostQuantizeSelector : public SubgraphSelectorV2 { + public: + /*! \brief pattern match status */ + enum SelectStatus { + kFail = 0, + kStart, + kRequantize, + kSuccess, + }; + + private: + bool disable_all; + bool disable_float_output; + SelectStatus status; + std::vector matched_list; + + public: + explicit SgDNNLFCPostQuantizeSelector(const bool dis_all, const bool dis_float_output) + : disable_all(dis_all), disable_float_output(dis_float_output) {} + + bool Select(const BiDirectedNode& n) override { + const auto rawnode = n.node; + if ((!disable_all) && rawnode->op() == Op::Get(QUANTIZED_FC_NAME)) { + status = disable_all ? kSuccess : kStart; + matched_list.clear(); + matched_list.push_back(&n); + return true; + } + return false; + } + + bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { + return false; + } + + bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { + const auto raw_node = n.node; + const auto raw_new_node = new_node.node; + if (status == kFail || status == kSuccess || raw_new_node->is_variable()) + return false; + // If n isn't the last matched node, then we encoutered a internal + // branch, we should pop out the node behind n and stop fusion. + if (matched_list.back() != &n) { + if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { + while (matched_list.back() != &n) { + matched_list.pop_back(); + } + } + + status = kSuccess; + return false; + } + + switch (status) { + case kStart: + if (raw_new_node->op() == Op::Get("_contrib_requantize")) { + auto const& param = nnvm::get(raw_new_node->attrs.parsed); + if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { + matched_list.push_back(&new_node); + status = kRequantize; + return true; + } + } + case kRequantize: + if ((!disable_float_output) && (raw_new_node->op() == Op::Get("_contrib_dequantize"))) { + CHECK(raw_node->op() == Op::Get("_contrib_requantize")); + if (n.outputs.size() > 1) { + // check if requantize have other outputs than dequantize + // if it has we can't fuse dequantize into FC + for (auto kv : n.outputs) { + const auto& node = kv.first; + if (node->op() != Op::Get("_contrib_dequantize")) { + status = kSuccess; + return false; + } + } + } + matched_list.push_back(&new_node); + status = kSuccess; + return true; + } + default: + status = kSuccess; + return false; + } + } + + std::vector Filter(const std::vector& candidates) override { + if ((status != kSuccess) || (matched_list.size() <= 1)) { + return std::vector(0); + } else { + std::vector ret; + for (auto i : matched_list) { + auto non_const_i = const_cast(i); + if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { + ret.push_back(non_const_i); + } + } + return ret; + } + } + + void Reset() override { + CHECK_GE(matched_list.size(), 1); + auto new_selector = SgDNNLFCPostQuantizeSelector(disable_all, disable_float_output); + new_selector.Select(*matched_list[0]); + *this = new_selector; + } +}; + +class SgDNNLFCPostQuantizeProperty : public SubgraphProperty { + public: + SgDNNLFCPostQuantizeProperty() { + disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QFC_FUSE_ALL", false); + disable_float_output = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QFC_FLOAT_OUTPUT", false); + } + + static SubgraphPropertyPtr Create() { + static const std::string& name = "oneDNN FullyConected post-quantization optimization pass"; + auto property = std::make_shared(); + property->SetAttr("property_name", name); + property->SetAttr("inference_only", true); + return property; + } + + nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, + const int subgraph_id = 0) const override { + nnvm::ObjectPtr fc_node = nullptr; + nnvm::ObjectPtr requantize_node = nullptr; + nnvm::ObjectPtr dequantize_node = nullptr; + + DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { + if (node->is_variable()) + return; + if (node->op() == Op::Get(QUANTIZED_FC_NAME)) { + fc_node = node; + } else if (node->op() == Op::Get("_contrib_requantize")) { + requantize_node = node; + } else if (node->op() == Op::Get("_contrib_dequantize")) { + dequantize_node = node; + } + }); + + CHECK_NOTNULL(fc_node); + CHECK_NOTNULL(requantize_node); + auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); + CHECK(requantize_param.min_calib_range.has_value()); + CHECK(requantize_param.max_calib_range.has_value()); + + // When only fused quantized_fullyconnected and requantize, set min/max_cablib_range, + // When fused quantized_fullyconnected + requantize + dequantize, set dequantize flag to true. + if (dequantize_node != nullptr) { + fc_node->attrs.dict["enable_float_output"] = "True"; + } else { + fc_node->attrs.dict["min_calib_range"] = + std::to_string(requantize_param.min_calib_range.value()); + fc_node->attrs.dict["max_calib_range"] = + std::to_string(requantize_param.max_calib_range.value()); + } + fc_node->op()->attr_parser(&(fc_node->attrs)); + return fc_node; + } + + SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { + auto selector = + std::make_shared(disable_fuse_all, disable_float_output); + return selector; + } + + void ConnectSubgraphOutputs(const nnvm::ObjectPtr n, + std::vector* output_entries) const override { + for (size_t i = 0; i < output_entries->size(); ++i) { + auto entry_ptr = output_entries->at(i); + *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0}; + } + } + + private: + bool disable_fuse_all; + bool disable_float_output; +}; + +} // namespace op +} // namespace mxnet + +#endif // if MXNET_USE_ONEDNN == 1 +#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/dnnl/dnnl_fc_property.h b/src/operator/subgraph/dnnl/dnnl_fc_property.h index 9884dc7168ee..0eeb6df9502e 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc_property.h +++ b/src/operator/subgraph/dnnl/dnnl_fc_property.h @@ -25,15 +25,16 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_fc-inl.h" -#include "dnnl_subgraph_base-inl.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_fc-inl.h" +#include "./dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h new file mode 100644 index 000000000000..0b9dc1d2cfea --- /dev/null +++ b/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ +#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ + +#if MXNET_USE_ONEDNN == 1 + +#include +#include + +#include "operator/quantization/requantize-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" + +namespace mxnet { +namespace op { + +class SgDNNLMatmulPostQuantizeSelector : public SubgraphSelector { + public: + /*! \brief pattern match status */ + enum SelectStatus { + kFail = 0, + kStart, + kRequantize, + kSuccess, + }; + + private: + bool disable_all; + bool disable_float_output; + SelectStatus status; + std::vector matched_list; + + public: + explicit SgDNNLMatmulPostQuantizeSelector(const bool dis_all, const bool dis_float_output) + : disable_all(dis_all), disable_float_output(dis_float_output) {} + + bool Select(const nnvm::Node& n) override { + if ((!disable_all) && (n.op() == Op::Get("_sg_onednn_selfatt_qk") || + n.op() == Op::Get("_sg_onednn_selfatt_valatt") || + n.op() == Op::Get("_sg_onednn_batch_dot"))) { + status = disable_all ? kSuccess : kStart; + matched_list.clear(); + matched_list.push_back(&n); + return true; + } + return false; + } + + bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override { + return false; + } + + bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override { + if (status == kFail || status == kSuccess || new_node.is_variable()) + return false; + // If n isn't the last matched node, then we encoutered a internal + // branch, we should pop out the node behind n and stop fusion. + if (matched_list.back() != &n) { + if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { + while (matched_list.back() != &n) { + matched_list.pop_back(); + } + } + + status = kSuccess; + return false; + } + + switch (status) { + case kStart: + if (new_node.op() == Op::Get("_contrib_requantize")) { + auto const& param = nnvm::get(new_node.attrs.parsed); + if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { + matched_list.push_back(&new_node); + status = kRequantize; + return true; + } + } + case kRequantize: + if ((!disable_float_output) && (new_node.op() == Op::Get("_contrib_dequantize"))) { + matched_list.push_back(&new_node); + status = kSuccess; + return true; + } + default: + status = kSuccess; + return false; + } + } + + std::vector Filter(const std::vector& candidates) override { + if ((status != kSuccess) || (matched_list.size() <= 1)) { + return std::vector(0); + } else { + std::vector ret; + for (auto i : matched_list) { + auto non_const_i = const_cast(i); + if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { + ret.push_back(non_const_i); + } + } + return ret; + } + } + + void Reset() override { + CHECK_GE(matched_list.size(), 1); + auto new_selector = SgDNNLMatmulPostQuantizeSelector(disable_all, disable_float_output); + new_selector.Select(*matched_list[0]); + *this = new_selector; + } +}; + +class SgDNNLMatmulPostQuantizeProperty : public SubgraphProperty { + public: + SgDNNLMatmulPostQuantizeProperty() { + disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_DNNL_QMATMUL_FUSE_ALL", false); + disable_float_output = dmlc::GetEnv("MXNET_DISABLE_DNNL_QMATMUL_FLOAT_OUTPUT", false); + } + + static SubgraphPropertyPtr Create() { + static const std::string& name = "DNNL Matmul post-quantization optimization pass"; + auto property = std::make_shared(); + property->SetAttr("property_name", name); + property->SetAttr("inference_only", true); + return property; + } + + nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, + const int subgraph_id = 0) const override { + nnvm::ObjectPtr interleaved_node = nullptr; + nnvm::ObjectPtr requantize_node = nullptr; + nnvm::ObjectPtr dequantize_node = nullptr; + + DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { + if (node->is_variable()) + return; + if (node->op() == Op::Get("_sg_onednn_selfatt_qk") || + node->op() == Op::Get("_sg_onednn_selfatt_valatt") || + node->op() == Op::Get("_sg_onednn_batch_dot")) { + interleaved_node = node; + } else if (node->op() == Op::Get("_contrib_requantize")) { + requantize_node = node; + } else if (node->op() == Op::Get("_contrib_dequantize")) { + dequantize_node = node; + } + }); + + CHECK_NOTNULL(interleaved_node); + CHECK_NOTNULL(requantize_node); + auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); + CHECK(requantize_param.min_calib_range.has_value()); + CHECK(requantize_param.max_calib_range.has_value()); + + // When only fusing quantized_interleaved_matmul and requantize, set min/max_cablib_range, + // When fusing quantized_interleaved_matmul + requantize + dequantize, + // set dequantize flag to true. + if (dequantize_node != nullptr) { + interleaved_node->attrs.dict["enable_float_output"] = "True"; + } else { + interleaved_node->attrs.dict["min_calib_range"] = + std::to_string(requantize_param.min_calib_range.value()); + interleaved_node->attrs.dict["max_calib_range"] = + std::to_string(requantize_param.max_calib_range.value()); + } + interleaved_node->op()->attr_parser(&(interleaved_node->attrs)); + return interleaved_node; + } + + SubgraphSelectorPtr CreateSubgraphSelector() const override { + auto selector = + std::make_shared(disable_fuse_all, disable_float_output); + return selector; + } + + private: + bool disable_fuse_all; + bool disable_float_output; +}; + +} // namespace op +} // namespace mxnet + +#endif // if MXNET_USE_ONEDNN == 1 +#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h b/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h index a4cc724dd898..e23b950b5e3e 100644 --- a/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h +++ b/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h @@ -19,13 +19,14 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include -#include "../common.h" -#include "dnnl_subgraph_base-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_post_quantize_property.h index cddf4b447810..d9cb6c0a0d73 100644 --- a/src/operator/subgraph/dnnl/dnnl_post_quantize_property.h +++ b/src/operator/subgraph/dnnl/dnnl_post_quantize_property.h @@ -18,6 +18,7 @@ */ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_POST_QUANTIZE_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include diff --git a/src/operator/subgraph/dnnl/dnnl_subgraph_base-inl.h b/src/operator/subgraph/dnnl/dnnl_subgraph_base-inl.h index 0cb8a11d643f..ce832fbd3239 100644 --- a/src/operator/subgraph/dnnl/dnnl_subgraph_base-inl.h +++ b/src/operator/subgraph/dnnl/dnnl_subgraph_base-inl.h @@ -18,9 +18,10 @@ */ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_SUBGRAPH_BASE_INL_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_SUBGRAPH_BASE_INL_H_ + #if MXNET_USE_ONEDNN == 1 -#include "../subgraph_property.h" +#include "operator/subgraph/subgraph_property.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_transformer-inl.h b/src/operator/subgraph/dnnl/dnnl_transformer-inl.h index b711e5364957..6f69064f762b 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer-inl.h +++ b/src/operator/subgraph/dnnl/dnnl_transformer-inl.h @@ -20,8 +20,8 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_INL_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_INL_H_ -#include "../../mshadow_op.h" -#include "../../mxnet_op.h" +#include "operator/mshadow_op.h" +#include "operator/mxnet_op.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_transformer.cc b/src/operator/subgraph/dnnl/dnnl_transformer.cc index 5871cb29db14..7df6677020a2 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer.cc +++ b/src/operator/subgraph/dnnl/dnnl_transformer.cc @@ -23,10 +23,10 @@ #include #include -#include "../../contrib/transformer-inl.h" -#include "../../quantization/quantization_utils.h" -#include "../../tensor/elemwise_unary_op.h" -#include "../common.h" +#include "operator/contrib/transformer-inl.h" +#include "operator/quantization/quantization_utils.h" +#include "operator/tensor/elemwise_unary_op.h" +#include "operator/subgraph/common.h" #include "./dnnl_transformer-inl.h" // 3 tensors within one (queries key values) = diff --git a/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h b/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h index 3be675cfc45d..e5ae54a9e126 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h +++ b/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h @@ -19,19 +19,20 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_QK_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include #include -#include "../../contrib/transformer-inl.h" -#include "../../numpy/np_matrix_op-inl.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_common.h" -#include "dnnl_subgraph_base-inl.h" -#include "dnnl_transformer-inl.h" +#include "operator/contrib/transformer-inl.h" +#include "operator/numpy/np_matrix_op-inl.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_common.h" +#include "./dnnl_subgraph_base-inl.h" +#include "./dnnl_transformer-inl.h" /* custom_op diff --git a/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h b/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h index a62c10fdb0d1..374615b25fa4 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h +++ b/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h @@ -19,20 +19,21 @@ #ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_VALATT_PROPERTY_H_ #define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_TRANSFORMER_VALATT_PROPERTY_H_ + #if MXNET_USE_ONEDNN == 1 #include #include #include -#include "../../contrib/transformer-inl.h" -#include "../../numpy/np_matrix_op-inl.h" -#include "../../swapaxis-inl.h" -#include "../../tensor/matrix_op-inl.h" -#include "../common.h" -#include "dnnl_common.h" -#include "dnnl_subgraph_base-inl.h" -#include "dnnl_transformer-inl.h" +#include "operator/contrib/transformer-inl.h" +#include "operator/numpy/np_matrix_op-inl.h" +#include "operator/swapaxis-inl.h" +#include "operator/tensor/matrix_op-inl.h" +#include "operator/subgraph/common.h" +#include "./dnnl_common.h" +#include "./dnnl_subgraph_base-inl.h" +#include "./dnnl_transformer-inl.h" /* custom_op From ac97edf88870d57c076fa8e9d0dc61686def1f19 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 25 Nov 2021 14:57:57 +0100 Subject: [PATCH 2/5] Conflict && simplyfy includes --- src/operator/nn/dnnl/dnnl_act.cc | 4 ++-- src/operator/nn/dnnl/dnnl_base.cc | 4 ++-- src/operator/nn/dnnl/dnnl_batch_dot-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_batch_dot.cc | 2 +- src/operator/nn/dnnl/dnnl_batch_norm-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_concat-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_concat.cc | 2 +- src/operator/nn/dnnl/dnnl_convolution-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_convolution.cc | 6 +++--- src/operator/nn/dnnl/dnnl_copy.cc | 4 ++-- src/operator/nn/dnnl/dnnl_deconvolution-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_deconvolution.cc | 2 +- src/operator/nn/dnnl/dnnl_fully_connected-inl.h | 2 +- src/operator/nn/dnnl/dnnl_fully_connected.cc | 2 +- src/operator/nn/dnnl/dnnl_layer_norm-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_layer_norm.cc | 2 +- src/operator/nn/dnnl/dnnl_log_softmax.cc | 4 ++-- src/operator/nn/dnnl/dnnl_lrn-inl.h | 2 +- src/operator/nn/dnnl/dnnl_pooling-inl.h | 2 +- src/operator/nn/dnnl/dnnl_pooling.cc | 2 +- src/operator/nn/dnnl/dnnl_reshape.cc | 6 +++--- src/operator/nn/dnnl/dnnl_rnn-inl.h | 2 +- src/operator/nn/dnnl/dnnl_rnn.cc | 2 +- src/operator/nn/dnnl/dnnl_slice-inl.h | 2 +- src/operator/nn/dnnl/dnnl_slice.cc | 6 +++--- src/operator/nn/dnnl/dnnl_softmax-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_softmax.cc | 2 +- src/operator/nn/dnnl/dnnl_softmax_output.cc | 4 ++-- src/operator/nn/dnnl/dnnl_stack.cc | 8 ++++---- src/operator/nn/dnnl/dnnl_sum.cc | 4 ++-- src/operator/nn/dnnl/dnnl_transpose-inl.h | 4 ++-- src/operator/nn/dnnl/dnnl_transpose.cc | 2 +- src/operator/quantization/dnnl/dnnl_quantized_conv.cc | 1 + src/operator/subgraph/dnnl/dnnl_batch_dot_property.h | 2 +- src/operator/subgraph/dnnl/dnnl_bn_relu_property.h | 2 +- src/operator/subgraph/dnnl/dnnl_conv.cc | 4 ++-- src/operator/subgraph/dnnl/dnnl_conv_property.h | 2 +- .../dnnl/dnnl_elemwisemul_post_quantize_property.h | 2 +- src/operator/subgraph/dnnl/dnnl_fc-inl.h | 2 +- src/operator/subgraph/dnnl/dnnl_fc.cc | 4 ++-- .../subgraph/dnnl/dnnl_fc_post_quantize_property.h | 2 +- src/operator/subgraph/dnnl/dnnl_fc_property.h | 4 ++-- .../subgraph/dnnl/dnnl_matmul_post_quantize_property.h | 2 +- .../dnnl/dnnl_post_quantize_align_scale_property.h | 2 +- src/operator/subgraph/dnnl/dnnl_transformer.cc | 2 +- src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h | 6 +++--- .../subgraph/dnnl/dnnl_transformer_valatt_property.h | 6 +++--- 47 files changed, 77 insertions(+), 76 deletions(-) diff --git a/src/operator/nn/dnnl/dnnl_act.cc b/src/operator/nn/dnnl/dnnl_act.cc index a89ed26f13a9..2cc8a3475731 100644 --- a/src/operator/nn/dnnl/dnnl_act.cc +++ b/src/operator/nn/dnnl/dnnl_act.cc @@ -36,8 +36,8 @@ #include #include "operator/operator_common.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_act-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_act-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_base.cc b/src/operator/nn/dnnl/dnnl_base.cc index 1e39d6642585..a4603b5711ad 100644 --- a/src/operator/nn/dnnl/dnnl_base.cc +++ b/src/operator/nn/dnnl/dnnl_base.cc @@ -23,8 +23,8 @@ #include "../../../common/exec_utils.h" #include "operator/operator_common.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_batch_dot-inl.h b/src/operator/nn/dnnl/dnnl_batch_dot-inl.h index 7da7395b6aa1..4117b1795ef8 100644 --- a/src/operator/nn/dnnl/dnnl_batch_dot-inl.h +++ b/src/operator/nn/dnnl/dnnl_batch_dot-inl.h @@ -32,8 +32,8 @@ #include #include "operator/tensor/dot-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_batch_dot.cc b/src/operator/nn/dnnl/dnnl_batch_dot.cc index ff45f19646b8..4534feafd1c0 100644 --- a/src/operator/nn/dnnl/dnnl_batch_dot.cc +++ b/src/operator/nn/dnnl/dnnl_batch_dot.cc @@ -24,7 +24,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_batch_dot-inl.h" +#include "dnnl_batch_dot-inl.h" #include "operator/quantization/quantization_utils.h" namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_batch_norm-inl.h b/src/operator/nn/dnnl/dnnl_batch_norm-inl.h index f74329a935d8..ca644340a37f 100644 --- a/src/operator/nn/dnnl/dnnl_batch_norm-inl.h +++ b/src/operator/nn/dnnl/dnnl_batch_norm-inl.h @@ -32,8 +32,8 @@ #include #include "operator/nn/batch_norm-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_concat-inl.h b/src/operator/nn/dnnl/dnnl_concat-inl.h index cfb54ee88683..297090818552 100644 --- a/src/operator/nn/dnnl/dnnl_concat-inl.h +++ b/src/operator/nn/dnnl/dnnl_concat-inl.h @@ -30,8 +30,8 @@ #include #include "operator/nn/concat-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_concat.cc b/src/operator/nn/dnnl/dnnl_concat.cc index f8052fdc3bfe..83ba9df24543 100644 --- a/src/operator/nn/dnnl/dnnl_concat.cc +++ b/src/operator/nn/dnnl/dnnl_concat.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_concat-inl.h" +#include "dnnl_concat-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_convolution-inl.h b/src/operator/nn/dnnl/dnnl_convolution-inl.h index 5af8d0db1f68..0c48d0d9faa8 100644 --- a/src/operator/nn/dnnl/dnnl_convolution-inl.h +++ b/src/operator/nn/dnnl/dnnl_convolution-inl.h @@ -31,8 +31,8 @@ #include #include "operator/nn/convolution-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_convolution.cc b/src/operator/nn/dnnl/dnnl_convolution.cc index 905fa64a658f..fbe67dfb2e50 100644 --- a/src/operator/nn/dnnl/dnnl_convolution.cc +++ b/src/operator/nn/dnnl/dnnl_convolution.cc @@ -26,9 +26,9 @@ #if MXNET_USE_ONEDNN == 1 #include "operator/nn/convolution-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_convolution-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_convolution-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_copy.cc b/src/operator/nn/dnnl/dnnl_copy.cc index 2b78103a02ee..0fa9dc1d7fb3 100644 --- a/src/operator/nn/dnnl/dnnl_copy.cc +++ b/src/operator/nn/dnnl/dnnl_copy.cc @@ -23,8 +23,8 @@ * \author */ -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" #if MXNET_USE_ONEDNN == 1 namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h index a15a301865f7..10784232ed4e 100644 --- a/src/operator/nn/dnnl/dnnl_deconvolution-inl.h +++ b/src/operator/nn/dnnl/dnnl_deconvolution-inl.h @@ -42,8 +42,8 @@ #include #include "operator/nn/deconvolution-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_deconvolution.cc b/src/operator/nn/dnnl/dnnl_deconvolution.cc index b15947e3f81f..9140d198f9f2 100644 --- a/src/operator/nn/dnnl/dnnl_deconvolution.cc +++ b/src/operator/nn/dnnl/dnnl_deconvolution.cc @@ -24,7 +24,7 @@ #if MXNET_USE_ONEDNN == 1 #include "operator/nn/deconvolution-inl.h" -#include "./dnnl_deconvolution-inl.h" +#include "dnnl_deconvolution-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_fully_connected-inl.h b/src/operator/nn/dnnl/dnnl_fully_connected-inl.h index a71770f3e199..c30ad4b9c69b 100644 --- a/src/operator/nn/dnnl/dnnl_fully_connected-inl.h +++ b/src/operator/nn/dnnl/dnnl_fully_connected-inl.h @@ -32,7 +32,7 @@ #include #include "operator/nn/fully_connected-inl.h" -#include "./dnnl_base-inl.h" +#include "dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_fully_connected.cc b/src/operator/nn/dnnl/dnnl_fully_connected.cc index e2d65cb1b245..eca90b7cf4c6 100644 --- a/src/operator/nn/dnnl/dnnl_fully_connected.cc +++ b/src/operator/nn/dnnl/dnnl_fully_connected.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_fully_connected-inl.h" +#include "dnnl_fully_connected-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_layer_norm-inl.h b/src/operator/nn/dnnl/dnnl_layer_norm-inl.h index d03bf798acac..c751930967b7 100644 --- a/src/operator/nn/dnnl/dnnl_layer_norm-inl.h +++ b/src/operator/nn/dnnl/dnnl_layer_norm-inl.h @@ -30,8 +30,8 @@ #include #include "operator/nn/layer_norm-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_layer_norm.cc b/src/operator/nn/dnnl/dnnl_layer_norm.cc index 2c938db683e1..4108a620ba79 100644 --- a/src/operator/nn/dnnl/dnnl_layer_norm.cc +++ b/src/operator/nn/dnnl/dnnl_layer_norm.cc @@ -24,7 +24,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_layer_norm-inl.h" +#include "dnnl_layer_norm-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_log_softmax.cc b/src/operator/nn/dnnl/dnnl_log_softmax.cc index 615c539de7aa..2fe3532c765f 100644 --- a/src/operator/nn/dnnl/dnnl_log_softmax.cc +++ b/src/operator/nn/dnnl/dnnl_log_softmax.cc @@ -23,8 +23,8 @@ */ #include "operator/nn/softmax-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" #if MXNET_USE_ONEDNN == 1 namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_lrn-inl.h b/src/operator/nn/dnnl/dnnl_lrn-inl.h index 90d43fc41bd6..913a6214611a 100644 --- a/src/operator/nn/dnnl/dnnl_lrn-inl.h +++ b/src/operator/nn/dnnl/dnnl_lrn-inl.h @@ -31,7 +31,7 @@ #include #include "operator/nn/lrn-inl.h" -#include "./dnnl_base-inl.h" +#include "dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_pooling-inl.h b/src/operator/nn/dnnl/dnnl_pooling-inl.h index fa65ba51919f..e01270a18314 100644 --- a/src/operator/nn/dnnl/dnnl_pooling-inl.h +++ b/src/operator/nn/dnnl/dnnl_pooling-inl.h @@ -30,7 +30,7 @@ #include #include "operator/nn/pooling-inl.h" -#include "./dnnl_base-inl.h" +#include "dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_pooling.cc b/src/operator/nn/dnnl/dnnl_pooling.cc index 418c832703ff..5f21a796d0dd 100644 --- a/src/operator/nn/dnnl/dnnl_pooling.cc +++ b/src/operator/nn/dnnl/dnnl_pooling.cc @@ -25,7 +25,7 @@ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_pooling-inl.h" +#include "dnnl_pooling-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_reshape.cc b/src/operator/nn/dnnl/dnnl_reshape.cc index 5602f78ef8e2..8481eaee51c1 100644 --- a/src/operator/nn/dnnl/dnnl_reshape.cc +++ b/src/operator/nn/dnnl/dnnl_reshape.cc @@ -25,9 +25,9 @@ #if MXNET_USE_ONEDNN == 1 #include "operator/tensor/elemwise_unary_op.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" -#include "./dnnl_reshape-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" +#include "dnnl_reshape-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_rnn-inl.h b/src/operator/nn/dnnl/dnnl_rnn-inl.h index 3fc779446889..f28753461e58 100644 --- a/src/operator/nn/dnnl/dnnl_rnn-inl.h +++ b/src/operator/nn/dnnl/dnnl_rnn-inl.h @@ -31,7 +31,7 @@ #include #include "operator/rnn-inl.h" -#include "./dnnl_base-inl.h" +#include "dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_rnn.cc b/src/operator/nn/dnnl/dnnl_rnn.cc index 22b9e27e556f..0877d10dd939 100644 --- a/src/operator/nn/dnnl/dnnl_rnn.cc +++ b/src/operator/nn/dnnl/dnnl_rnn.cc @@ -27,7 +27,7 @@ #include -#include "./dnnl_rnn-inl.h" +#include "dnnl_rnn-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_slice-inl.h b/src/operator/nn/dnnl/dnnl_slice-inl.h index a145c6f82e09..db4d562595e7 100644 --- a/src/operator/nn/dnnl/dnnl_slice-inl.h +++ b/src/operator/nn/dnnl/dnnl_slice-inl.h @@ -36,7 +36,7 @@ #include "operator/operator_common.h" #include "operator/tensor/slice-inl.h" -#include "./dnnl_base-inl.h" +#include "dnnl_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_slice.cc b/src/operator/nn/dnnl/dnnl_slice.cc index 32bef008d796..300813342555 100644 --- a/src/operator/nn/dnnl/dnnl_slice.cc +++ b/src/operator/nn/dnnl/dnnl_slice.cc @@ -25,9 +25,9 @@ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" -#include "./dnnl_slice-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" +#include "dnnl_slice-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_softmax-inl.h b/src/operator/nn/dnnl/dnnl_softmax-inl.h index 17ae88c54263..eea520eef88c 100644 --- a/src/operator/nn/dnnl/dnnl_softmax-inl.h +++ b/src/operator/nn/dnnl/dnnl_softmax-inl.h @@ -36,8 +36,8 @@ #if MXNET_USE_ONEDNN == 1 #include -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" #include "operator/nn/softmax-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_softmax.cc b/src/operator/nn/dnnl/dnnl_softmax.cc index 72a25d4c85b9..dd519b200431 100644 --- a/src/operator/nn/dnnl/dnnl_softmax.cc +++ b/src/operator/nn/dnnl/dnnl_softmax.cc @@ -24,7 +24,7 @@ */ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_softmax-inl.h" +#include "dnnl_softmax-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_softmax_output.cc b/src/operator/nn/dnnl/dnnl_softmax_output.cc index 168c4152d90e..94b0029d9b06 100644 --- a/src/operator/nn/dnnl/dnnl_softmax_output.cc +++ b/src/operator/nn/dnnl/dnnl_softmax_output.cc @@ -26,8 +26,8 @@ #if MXNET_USE_ONEDNN == 1 #include "operator/softmax_output-inl.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_stack.cc b/src/operator/nn/dnnl/dnnl_stack.cc index aabb66e9da99..1951a62d1e29 100644 --- a/src/operator/nn/dnnl/dnnl_stack.cc +++ b/src/operator/nn/dnnl/dnnl_stack.cc @@ -21,11 +21,11 @@ * \file dnnl_stack.cc */ -#include "./dnnl_base-inl.h" -#include "./dnnl_concat-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_concat-inl.h" +#include "dnnl_ops-inl.h" -#include "../../tensor/matrix_op-inl.h" +#include "operator/tensor/matrix_op-inl.h" #if MXNET_USE_ONEDNN == 1 namespace mxnet { diff --git a/src/operator/nn/dnnl/dnnl_sum.cc b/src/operator/nn/dnnl/dnnl_sum.cc index 35ded9854083..c2626a26b4e0 100644 --- a/src/operator/nn/dnnl/dnnl_sum.cc +++ b/src/operator/nn/dnnl/dnnl_sum.cc @@ -25,8 +25,8 @@ #include #include "operator/operator_common.h" -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_transpose-inl.h b/src/operator/nn/dnnl/dnnl_transpose-inl.h index 1e1cef116a54..721e45abf586 100644 --- a/src/operator/nn/dnnl/dnnl_transpose-inl.h +++ b/src/operator/nn/dnnl/dnnl_transpose-inl.h @@ -26,8 +26,8 @@ #define MXNET_OPERATOR_NN_DNNL_DNNL_TRANSPOSE_INL_H_ #if MXNET_USE_ONEDNN == 1 -#include "./dnnl_base-inl.h" -#include "./dnnl_ops-inl.h" +#include "dnnl_base-inl.h" +#include "dnnl_ops-inl.h" #include "operator/numpy/np_matrix_op-inl.h" diff --git a/src/operator/nn/dnnl/dnnl_transpose.cc b/src/operator/nn/dnnl/dnnl_transpose.cc index 29628adf4059..d78ae98f1a98 100644 --- a/src/operator/nn/dnnl/dnnl_transpose.cc +++ b/src/operator/nn/dnnl/dnnl_transpose.cc @@ -26,7 +26,7 @@ #if MXNET_USE_ONEDNN == 1 #include "operator/tensor/matrix_op-inl.h" -#include "./dnnl_transpose-inl.h" +#include "dnnl_transpose-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/quantization/dnnl/dnnl_quantized_conv.cc b/src/operator/quantization/dnnl/dnnl_quantized_conv.cc index 6f796ad799b8..158d0eadfaa7 100644 --- a/src/operator/quantization/dnnl/dnnl_quantized_conv.cc +++ b/src/operator/quantization/dnnl/dnnl_quantized_conv.cc @@ -30,6 +30,7 @@ #include "operator/nn/dnnl/dnnl_convolution-inl.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/quantization/quantization_utils.h" + namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h b/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h index 238c6efd9691..d91b17ae2c1f 100644 --- a/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h +++ b/src/operator/subgraph/dnnl/dnnl_batch_dot_property.h @@ -27,7 +27,7 @@ #include "operator/tensor/dot-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h b/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h index 8603a6dbaa5c..d59b5e29f8d6 100644 --- a/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h +++ b/src/operator/subgraph/dnnl/dnnl_bn_relu_property.h @@ -28,7 +28,7 @@ #include "operator/nn/dnnl/dnnl_act-inl.h" #include "operator/nn/dnnl/dnnl_batch_norm-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_conv.cc b/src/operator/subgraph/dnnl/dnnl_conv.cc index 2ce32dbfe134..ad7a68380fde 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv.cc +++ b/src/operator/subgraph/dnnl/dnnl_conv.cc @@ -29,8 +29,8 @@ #include "operator/quantization/quantization_utils.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_common.h" -#include "./dnnl_conv-inl.h" +#include "dnnl_common.h" +#include "dnnl_conv-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_conv_property.h b/src/operator/subgraph/dnnl/dnnl_conv_property.h index 479cd21ecaa4..c5e027d5b123 100644 --- a/src/operator/subgraph/dnnl/dnnl_conv_property.h +++ b/src/operator/subgraph/dnnl/dnnl_conv_property.h @@ -31,7 +31,7 @@ #include "operator/nn/dnnl/dnnl_ops-inl.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h index 4abc4e051986..82c77b66f546 100644 --- a/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h +++ b/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h @@ -35,7 +35,7 @@ #include "operator/quantization/requantize-inl.h" #include "operator/tensor/elemwise_binary_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc-inl.h b/src/operator/subgraph/dnnl/dnnl_fc-inl.h index 021ffc4360c7..09ca2ca05307 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc-inl.h +++ b/src/operator/subgraph/dnnl/dnnl_fc-inl.h @@ -27,7 +27,7 @@ #include #include "operator/nn/dnnl/dnnl_fully_connected-inl.h" -#include "./dnnl.hpp" +#include "dnnl.hpp" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc.cc b/src/operator/subgraph/dnnl/dnnl_fc.cc index 77a0d64d9a45..49887fec8684 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc.cc +++ b/src/operator/subgraph/dnnl/dnnl_fc.cc @@ -36,8 +36,8 @@ #include "operator/quantization/quantization_utils.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_common.h" -#include "./dnnl_fc-inl.h" +#include "dnnl_common.h" +#include "dnnl_fc-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h index 692fd42bf0a4..e8bb29e0a0c4 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h +++ b/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h @@ -35,7 +35,7 @@ #include "operator/nn/fully_connected-inl.h" #include "operator/quantization/requantize-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_fc_property.h b/src/operator/subgraph/dnnl/dnnl_fc_property.h index 0eeb6df9502e..64fd507e743b 100644 --- a/src/operator/subgraph/dnnl/dnnl_fc_property.h +++ b/src/operator/subgraph/dnnl/dnnl_fc_property.h @@ -33,8 +33,8 @@ #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_fc-inl.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_fc-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h index 0b9dc1d2cfea..03b95d5749d3 100644 --- a/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h +++ b/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h @@ -27,7 +27,7 @@ #include "operator/quantization/requantize-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h b/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h index e23b950b5e3e..62d88d6dc7b7 100644 --- a/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h +++ b/src/operator/subgraph/dnnl/dnnl_post_quantize_align_scale_property.h @@ -26,7 +26,7 @@ #include #include "operator/subgraph/common.h" -#include "./dnnl_subgraph_base-inl.h" +#include "dnnl_subgraph_base-inl.h" namespace mxnet { namespace op { diff --git a/src/operator/subgraph/dnnl/dnnl_transformer.cc b/src/operator/subgraph/dnnl/dnnl_transformer.cc index 7df6677020a2..6574be6e8424 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer.cc +++ b/src/operator/subgraph/dnnl/dnnl_transformer.cc @@ -27,7 +27,7 @@ #include "operator/quantization/quantization_utils.h" #include "operator/tensor/elemwise_unary_op.h" #include "operator/subgraph/common.h" -#include "./dnnl_transformer-inl.h" +#include "dnnl_transformer-inl.h" // 3 tensors within one (queries key values) = #define QKV_NUM 3 diff --git a/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h b/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h index e5ae54a9e126..c117cf67fecf 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h +++ b/src/operator/subgraph/dnnl/dnnl_transformer_qk_property.h @@ -30,9 +30,9 @@ #include "operator/numpy/np_matrix_op-inl.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_common.h" -#include "./dnnl_subgraph_base-inl.h" -#include "./dnnl_transformer-inl.h" +#include "dnnl_common.h" +#include "dnnl_subgraph_base-inl.h" +#include "dnnl_transformer-inl.h" /* custom_op diff --git a/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h b/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h index 374615b25fa4..35f02f620379 100644 --- a/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h +++ b/src/operator/subgraph/dnnl/dnnl_transformer_valatt_property.h @@ -31,9 +31,9 @@ #include "operator/swapaxis-inl.h" #include "operator/tensor/matrix_op-inl.h" #include "operator/subgraph/common.h" -#include "./dnnl_common.h" -#include "./dnnl_subgraph_base-inl.h" -#include "./dnnl_transformer-inl.h" +#include "dnnl_common.h" +#include "dnnl_subgraph_base-inl.h" +#include "dnnl_transformer-inl.h" /* custom_op From 51cd121c2ba8fa80560f42555aba450651cbad6d Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 25 Nov 2021 15:02:18 +0100 Subject: [PATCH 3/5] Delete unused files --- .../dnnl_elemwisemul_post_quantize_property.h | 232 ------------------ .../dnnl/dnnl_fc_post_quantize_property.h | 231 ----------------- .../dnnl/dnnl_matmul_post_quantize_property.h | 203 --------------- 3 files changed, 666 deletions(-) delete mode 100644 src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h delete mode 100644 src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h delete mode 100644 src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h diff --git a/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h deleted file mode 100644 index 82c77b66f546..000000000000 --- a/src/operator/subgraph/dnnl/dnnl_elemwisemul_post_quantize_property.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file dnnl_elemwisemul_post_quantize_property.cc - * \brief Partition gragph property for oneDNN Quantized ElemwiseMul operator - * \author Xinyu Chen - */ - -#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ -#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ - -#if MXNET_USE_ONEDNN == 1 - -#include -#include -#include - -#include "operator/quantization/requantize-inl.h" -#include "operator/tensor/elemwise_binary_op-inl.h" -#include "operator/subgraph/common.h" -#include "dnnl_subgraph_base-inl.h" - -namespace mxnet { -namespace op { - -#define QUANTIZED_ElemwiseMul_NAME "_contrib_quantized_elemwise_mul" - -class ElemwiseMulPostQuantizeSelector : public SubgraphSelectorV2 { - public: - /*! \brief pattern match status */ - enum SelectStatus { - kFail = 0, - kStart, - kRequantize, - kSuccess, - }; - - private: - bool disable_all; - bool disable_float_output; - SelectStatus status; - std::vector matched_list; - - public: - explicit ElemwiseMulPostQuantizeSelector(const bool dis_all, const bool dis_float_output) - : disable_all(dis_all), disable_float_output(dis_float_output) {} - - bool Select(const BiDirectedNode& n) override { - const auto rawnode = n.node; - if ((!disable_all) && rawnode->op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { - status = disable_all ? kSuccess : kStart; - matched_list.clear(); - matched_list.push_back(&n); - return true; - } - return false; - } - - bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { - return false; - } - - bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { - const auto raw_node = n.node; - const auto raw_new_node = new_node.node; - if (status == kFail || status == kSuccess || raw_new_node->is_variable()) - return false; - // If n isn't the last matched node, then we encoutered a internal - // branch, we should pop out the node behind n and stop fusion. - if (matched_list.back() != &n) { - if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { - while (matched_list.back() != &n) { - matched_list.pop_back(); - } - } - - status = kSuccess; - return false; - } - - switch (status) { - case kStart: - if (raw_new_node->op() == Op::Get("_contrib_requantize")) { - auto const& param = nnvm::get(raw_new_node->attrs.parsed); - if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { - matched_list.push_back(&new_node); - status = kRequantize; - return true; - } - } - case kRequantize: - if ((!disable_float_output) && (raw_new_node->op() == Op::Get("_contrib_dequantize"))) { - CHECK(raw_node->op() == Op::Get("_contrib_requantize")); - if (n.outputs.size() > 1) { - // check if requantize have other outputs than dequantize - // if it has we can't fuse dequantize into elemwise_mul - for (auto kv : n.outputs) { - const auto& node = kv.first; - if (node->op() != Op::Get("_contrib_dequantize")) { - status = kSuccess; - return false; - } - } - } - - matched_list.push_back(&new_node); - status = kSuccess; - return true; - } - default: - status = kSuccess; - return false; - } - } - - std::vector Filter(const std::vector& candidates) override { - if ((status != kSuccess) || (matched_list.size() <= 1)) { - return std::vector(0); - } else { - std::vector ret; - for (auto i : matched_list) { - auto non_const_i = const_cast(i); - if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { - ret.push_back(non_const_i); - } - } - return ret; - } - } - - void Reset() override { - CHECK_GE(matched_list.size(), 1); - auto new_selector = ElemwiseMulPostQuantizeSelector(disable_all, disable_float_output); - new_selector.Select(*matched_list[0]); - *this = new_selector; - } -}; - -class ElemwiseMulPostQuantizeProperty : public SubgraphProperty { - public: - ElemwiseMulPostQuantizeProperty() { - disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QEM_FUSE_ALL", false); - disable_float_output = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QEM_FLOAT_OUTPUT", false); - } - - static SubgraphPropertyPtr Create() { - static const std::string& name = "oneDNN EltwiseMul post-quantization optimization pass"; - auto property = std::make_shared(); - property->SetAttr("property_name", name); - property->SetAttr("inference_only", true); - return property; - } - - nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, - const int subgraph_id = 0) const override { - nnvm::ObjectPtr em_node = nullptr; - nnvm::ObjectPtr requantize_node = nullptr; - nnvm::ObjectPtr dequantize_node = nullptr; - - DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { - if (node->is_variable()) - return; - if (node->op() == Op::Get(QUANTIZED_ElemwiseMul_NAME)) { - em_node = node; - } else if (node->op() == Op::Get("_contrib_requantize")) { - requantize_node = node; - } else if (node->op() == Op::Get("_contrib_dequantize")) { - dequantize_node = node; - } - }); - - CHECK_NOTNULL(em_node); - CHECK_NOTNULL(requantize_node); - auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); - CHECK(requantize_param.min_calib_range.has_value()); - CHECK(requantize_param.max_calib_range.has_value()); - - // When only fused quantized_elemwise_mul and requantize, set min/max_cablib_range, - // When fused quantized_elemwise_mul + requantize + dequantize, set dequantize flag to true. - if (dequantize_node != nullptr) { - em_node->attrs.dict["enable_float_output"] = "True"; - } else { - em_node->attrs.dict["min_calib_range"] = - std::to_string(requantize_param.min_calib_range.value()); - em_node->attrs.dict["max_calib_range"] = - std::to_string(requantize_param.max_calib_range.value()); - } - em_node->op()->attr_parser(&(em_node->attrs)); - return em_node; - } - - SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { - auto selector = - std::make_shared(disable_fuse_all, disable_float_output); - return selector; - } - - void ConnectSubgraphOutputs(const nnvm::ObjectPtr n, - std::vector* output_entries) const override { - for (size_t i = 0; i < output_entries->size(); ++i) { - auto entry_ptr = output_entries->at(i); - *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0}; - } - } - - private: - bool disable_fuse_all; - bool disable_float_output; -}; - -} // namespace op -} // namespace mxnet - -#endif // if MXNET_USE_ONEDNN == 1 -#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h deleted file mode 100644 index e8bb29e0a0c4..000000000000 --- a/src/operator/subgraph/dnnl/dnnl_fc_post_quantize_property.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file dnnl_fc_post_quantize_property.cc - * \brief Partition gragph property for oneDNN Quantized FullyConnected operator - * \author Ciyong Chen - */ - -#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ -#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ - -#if MXNET_USE_ONEDNN == 1 - -#include -#include -#include - -#include "operator/nn/fully_connected-inl.h" -#include "operator/quantization/requantize-inl.h" -#include "operator/subgraph/common.h" -#include "dnnl_subgraph_base-inl.h" - -namespace mxnet { -namespace op { - -#define QUANTIZED_FC_NAME "_sg_onednn_fully_connected" - -class SgDNNLFCPostQuantizeSelector : public SubgraphSelectorV2 { - public: - /*! \brief pattern match status */ - enum SelectStatus { - kFail = 0, - kStart, - kRequantize, - kSuccess, - }; - - private: - bool disable_all; - bool disable_float_output; - SelectStatus status; - std::vector matched_list; - - public: - explicit SgDNNLFCPostQuantizeSelector(const bool dis_all, const bool dis_float_output) - : disable_all(dis_all), disable_float_output(dis_float_output) {} - - bool Select(const BiDirectedNode& n) override { - const auto rawnode = n.node; - if ((!disable_all) && rawnode->op() == Op::Get(QUANTIZED_FC_NAME)) { - status = disable_all ? kSuccess : kStart; - matched_list.clear(); - matched_list.push_back(&n); - return true; - } - return false; - } - - bool SelectInput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { - return false; - } - - bool SelectOutput(const BiDirectedNode& n, const BiDirectedNode& new_node) override { - const auto raw_node = n.node; - const auto raw_new_node = new_node.node; - if (status == kFail || status == kSuccess || raw_new_node->is_variable()) - return false; - // If n isn't the last matched node, then we encoutered a internal - // branch, we should pop out the node behind n and stop fusion. - if (matched_list.back() != &n) { - if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { - while (matched_list.back() != &n) { - matched_list.pop_back(); - } - } - - status = kSuccess; - return false; - } - - switch (status) { - case kStart: - if (raw_new_node->op() == Op::Get("_contrib_requantize")) { - auto const& param = nnvm::get(raw_new_node->attrs.parsed); - if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { - matched_list.push_back(&new_node); - status = kRequantize; - return true; - } - } - case kRequantize: - if ((!disable_float_output) && (raw_new_node->op() == Op::Get("_contrib_dequantize"))) { - CHECK(raw_node->op() == Op::Get("_contrib_requantize")); - if (n.outputs.size() > 1) { - // check if requantize have other outputs than dequantize - // if it has we can't fuse dequantize into FC - for (auto kv : n.outputs) { - const auto& node = kv.first; - if (node->op() != Op::Get("_contrib_dequantize")) { - status = kSuccess; - return false; - } - } - } - matched_list.push_back(&new_node); - status = kSuccess; - return true; - } - default: - status = kSuccess; - return false; - } - } - - std::vector Filter(const std::vector& candidates) override { - if ((status != kSuccess) || (matched_list.size() <= 1)) { - return std::vector(0); - } else { - std::vector ret; - for (auto i : matched_list) { - auto non_const_i = const_cast(i); - if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { - ret.push_back(non_const_i); - } - } - return ret; - } - } - - void Reset() override { - CHECK_GE(matched_list.size(), 1); - auto new_selector = SgDNNLFCPostQuantizeSelector(disable_all, disable_float_output); - new_selector.Select(*matched_list[0]); - *this = new_selector; - } -}; - -class SgDNNLFCPostQuantizeProperty : public SubgraphProperty { - public: - SgDNNLFCPostQuantizeProperty() { - disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QFC_FUSE_ALL", false); - disable_float_output = dmlc::GetEnv("MXNET_DISABLE_ONEDNN_QFC_FLOAT_OUTPUT", false); - } - - static SubgraphPropertyPtr Create() { - static const std::string& name = "oneDNN FullyConected post-quantization optimization pass"; - auto property = std::make_shared(); - property->SetAttr("property_name", name); - property->SetAttr("inference_only", true); - return property; - } - - nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, - const int subgraph_id = 0) const override { - nnvm::ObjectPtr fc_node = nullptr; - nnvm::ObjectPtr requantize_node = nullptr; - nnvm::ObjectPtr dequantize_node = nullptr; - - DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { - if (node->is_variable()) - return; - if (node->op() == Op::Get(QUANTIZED_FC_NAME)) { - fc_node = node; - } else if (node->op() == Op::Get("_contrib_requantize")) { - requantize_node = node; - } else if (node->op() == Op::Get("_contrib_dequantize")) { - dequantize_node = node; - } - }); - - CHECK_NOTNULL(fc_node); - CHECK_NOTNULL(requantize_node); - auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); - CHECK(requantize_param.min_calib_range.has_value()); - CHECK(requantize_param.max_calib_range.has_value()); - - // When only fused quantized_fullyconnected and requantize, set min/max_cablib_range, - // When fused quantized_fullyconnected + requantize + dequantize, set dequantize flag to true. - if (dequantize_node != nullptr) { - fc_node->attrs.dict["enable_float_output"] = "True"; - } else { - fc_node->attrs.dict["min_calib_range"] = - std::to_string(requantize_param.min_calib_range.value()); - fc_node->attrs.dict["max_calib_range"] = - std::to_string(requantize_param.max_calib_range.value()); - } - fc_node->op()->attr_parser(&(fc_node->attrs)); - return fc_node; - } - - SubgraphSelectorV2Ptr CreateSubgraphSelectorV2() const override { - auto selector = - std::make_shared(disable_fuse_all, disable_float_output); - return selector; - } - - void ConnectSubgraphOutputs(const nnvm::ObjectPtr n, - std::vector* output_entries) const override { - for (size_t i = 0; i < output_entries->size(); ++i) { - auto entry_ptr = output_entries->at(i); - *entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0}; - } - } - - private: - bool disable_fuse_all; - bool disable_float_output; -}; - -} // namespace op -} // namespace mxnet - -#endif // if MXNET_USE_ONEDNN == 1 -#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_FC_POST_QUANTIZE_PROPERTY_H_ diff --git a/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h b/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h deleted file mode 100644 index 03b95d5749d3..000000000000 --- a/src/operator/subgraph/dnnl/dnnl_matmul_post_quantize_property.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ -#define MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ - -#if MXNET_USE_ONEDNN == 1 - -#include -#include - -#include "operator/quantization/requantize-inl.h" -#include "operator/subgraph/common.h" -#include "dnnl_subgraph_base-inl.h" - -namespace mxnet { -namespace op { - -class SgDNNLMatmulPostQuantizeSelector : public SubgraphSelector { - public: - /*! \brief pattern match status */ - enum SelectStatus { - kFail = 0, - kStart, - kRequantize, - kSuccess, - }; - - private: - bool disable_all; - bool disable_float_output; - SelectStatus status; - std::vector matched_list; - - public: - explicit SgDNNLMatmulPostQuantizeSelector(const bool dis_all, const bool dis_float_output) - : disable_all(dis_all), disable_float_output(dis_float_output) {} - - bool Select(const nnvm::Node& n) override { - if ((!disable_all) && (n.op() == Op::Get("_sg_onednn_selfatt_qk") || - n.op() == Op::Get("_sg_onednn_selfatt_valatt") || - n.op() == Op::Get("_sg_onednn_batch_dot"))) { - status = disable_all ? kSuccess : kStart; - matched_list.clear(); - matched_list.push_back(&n); - return true; - } - return false; - } - - bool SelectInput(const nnvm::Node& n, const nnvm::Node& new_node) override { - return false; - } - - bool SelectOutput(const nnvm::Node& n, const nnvm::Node& new_node) override { - if (status == kFail || status == kSuccess || new_node.is_variable()) - return false; - // If n isn't the last matched node, then we encoutered a internal - // branch, we should pop out the node behind n and stop fusion. - if (matched_list.back() != &n) { - if (std::find(matched_list.begin(), matched_list.end(), &n) != matched_list.end()) { - while (matched_list.back() != &n) { - matched_list.pop_back(); - } - } - - status = kSuccess; - return false; - } - - switch (status) { - case kStart: - if (new_node.op() == Op::Get("_contrib_requantize")) { - auto const& param = nnvm::get(new_node.attrs.parsed); - if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) { - matched_list.push_back(&new_node); - status = kRequantize; - return true; - } - } - case kRequantize: - if ((!disable_float_output) && (new_node.op() == Op::Get("_contrib_dequantize"))) { - matched_list.push_back(&new_node); - status = kSuccess; - return true; - } - default: - status = kSuccess; - return false; - } - } - - std::vector Filter(const std::vector& candidates) override { - if ((status != kSuccess) || (matched_list.size() <= 1)) { - return std::vector(0); - } else { - std::vector ret; - for (auto i : matched_list) { - auto non_const_i = const_cast(i); - if (std::find(candidates.begin(), candidates.end(), non_const_i) != candidates.end()) { - ret.push_back(non_const_i); - } - } - return ret; - } - } - - void Reset() override { - CHECK_GE(matched_list.size(), 1); - auto new_selector = SgDNNLMatmulPostQuantizeSelector(disable_all, disable_float_output); - new_selector.Select(*matched_list[0]); - *this = new_selector; - } -}; - -class SgDNNLMatmulPostQuantizeProperty : public SubgraphProperty { - public: - SgDNNLMatmulPostQuantizeProperty() { - disable_fuse_all = dmlc::GetEnv("MXNET_DISABLE_DNNL_QMATMUL_FUSE_ALL", false); - disable_float_output = dmlc::GetEnv("MXNET_DISABLE_DNNL_QMATMUL_FLOAT_OUTPUT", false); - } - - static SubgraphPropertyPtr Create() { - static const std::string& name = "DNNL Matmul post-quantization optimization pass"; - auto property = std::make_shared(); - property->SetAttr("property_name", name); - property->SetAttr("inference_only", true); - return property; - } - - nnvm::ObjectPtr CreateSubgraphNode(const nnvm::Symbol& sym, - const int subgraph_id = 0) const override { - nnvm::ObjectPtr interleaved_node = nullptr; - nnvm::ObjectPtr requantize_node = nullptr; - nnvm::ObjectPtr dequantize_node = nullptr; - - DFSVisit(sym.outputs, [&](const nnvm::ObjectPtr& node) { - if (node->is_variable()) - return; - if (node->op() == Op::Get("_sg_onednn_selfatt_qk") || - node->op() == Op::Get("_sg_onednn_selfatt_valatt") || - node->op() == Op::Get("_sg_onednn_batch_dot")) { - interleaved_node = node; - } else if (node->op() == Op::Get("_contrib_requantize")) { - requantize_node = node; - } else if (node->op() == Op::Get("_contrib_dequantize")) { - dequantize_node = node; - } - }); - - CHECK_NOTNULL(interleaved_node); - CHECK_NOTNULL(requantize_node); - auto const& requantize_param = nnvm::get(requantize_node->attrs.parsed); - CHECK(requantize_param.min_calib_range.has_value()); - CHECK(requantize_param.max_calib_range.has_value()); - - // When only fusing quantized_interleaved_matmul and requantize, set min/max_cablib_range, - // When fusing quantized_interleaved_matmul + requantize + dequantize, - // set dequantize flag to true. - if (dequantize_node != nullptr) { - interleaved_node->attrs.dict["enable_float_output"] = "True"; - } else { - interleaved_node->attrs.dict["min_calib_range"] = - std::to_string(requantize_param.min_calib_range.value()); - interleaved_node->attrs.dict["max_calib_range"] = - std::to_string(requantize_param.max_calib_range.value()); - } - interleaved_node->op()->attr_parser(&(interleaved_node->attrs)); - return interleaved_node; - } - - SubgraphSelectorPtr CreateSubgraphSelector() const override { - auto selector = - std::make_shared(disable_fuse_all, disable_float_output); - return selector; - } - - private: - bool disable_fuse_all; - bool disable_float_output; -}; - -} // namespace op -} // namespace mxnet - -#endif // if MXNET_USE_ONEDNN == 1 -#endif // MXNET_OPERATOR_SUBGRAPH_DNNL_DNNL_MATMUL_POST_QUANTIZE_PROPERTY_H_ From 47bf12777e22e2d8216d0dcafc89158ba71dcfb2 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 9 Dec 2021 11:13:45 +0100 Subject: [PATCH 4/5] Add misssed headers --- src/operator/nn/dnnl/dnnl_convolution.cc | 2 ++ src/operator/nn/dnnl/dnnl_rnn.cc | 1 + 2 files changed, 3 insertions(+) diff --git a/src/operator/nn/dnnl/dnnl_convolution.cc b/src/operator/nn/dnnl/dnnl_convolution.cc index fbe67dfb2e50..bb9c736566c8 100644 --- a/src/operator/nn/dnnl/dnnl_convolution.cc +++ b/src/operator/nn/dnnl/dnnl_convolution.cc @@ -30,6 +30,8 @@ #include "dnnl_convolution-inl.h" #include "dnnl_ops-inl.h" +#include + namespace mxnet { namespace op { diff --git a/src/operator/nn/dnnl/dnnl_rnn.cc b/src/operator/nn/dnnl/dnnl_rnn.cc index 0877d10dd939..0d65eb99350d 100644 --- a/src/operator/nn/dnnl/dnnl_rnn.cc +++ b/src/operator/nn/dnnl/dnnl_rnn.cc @@ -26,6 +26,7 @@ #if MXNET_USE_ONEDNN == 1 #include +#include #include "dnnl_rnn-inl.h" From 00f7c686fbcd3a50488d0393c0df75250507ad48 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Thu, 9 Dec 2021 11:34:24 +0100 Subject: [PATCH 5/5] Move header before --- src/operator/nn/dnnl/dnnl_convolution.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/dnnl/dnnl_convolution.cc b/src/operator/nn/dnnl/dnnl_convolution.cc index bb9c736566c8..072c15791df8 100644 --- a/src/operator/nn/dnnl/dnnl_convolution.cc +++ b/src/operator/nn/dnnl/dnnl_convolution.cc @@ -25,13 +25,13 @@ #if MXNET_USE_ONEDNN == 1 +#include + #include "operator/nn/convolution-inl.h" #include "dnnl_base-inl.h" #include "dnnl_convolution-inl.h" #include "dnnl_ops-inl.h" -#include - namespace mxnet { namespace op {