From e3aeef9bda5f7646bd38d183f8b74186e4806b73 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Thu, 25 Feb 2021 10:14:08 +0300 Subject: [PATCH] [CPU] MVN node migration on nGraph. (#12) --- .../src/mkldnn_plugin/CMakeLists.txt | 2 +- .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 132 ++++++------- .../src/mkldnn_plugin/mkldnn_node.cpp | 2 +- .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 178 +++++++++--------- .../src/mkldnn_plugin/nodes/mkldnn_mvn_node.h | 25 ++- 5 files changed, 169 insertions(+), 170 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index c8d970de620b01..9c81cbf96871b3 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -41,7 +41,7 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_split_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tile_node.cpp -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index d011c8e36b07ae..534c339ec308ae 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -124,9 +124,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { // FuseFullyConnectedAndSimpleOperation(graph); // graph.RemoveDroppedNodes(); -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseMVNAndSimpleOperation(graph); -// graph.RemoveDroppedNodes(); + FuseMVNAndSimpleOperation(graph); + graph.RemoveDroppedNodes(); FuseInterpolateAndSimpleOperation(graph); graph.RemoveDroppedNodes(); @@ -1347,73 +1346,66 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { -// auto& graphNodes = graph.GetNodes(); -// -// auto isSutableParentNode = [](MKLDNNNodePtr node) { -// bool isSutableMVN = (node->getType() == MVN) && (node->inDims[0].ndims() == 4 || node->inDims[0].ndims() == 5); -// -// if (isSutableMVN) { -// auto *mvnLayer = dynamic_cast(node->getCnnLayer().get()); -// if (mvnLayer == nullptr) -// THROW_IE_EXCEPTION << "Cannot get MVN layer " << node->getName(); -// -// return node->getChildEdges().size() == 1 && mvnLayer->across_channels == 0 && mvnLayer->normalize == 1; -// } else { -// return false; -// } -// }; -// -// auto isSutableChildNode = [](MKLDNNNodePtr node) { -// if (!node->getCnnLayer()) -// return false; -// -// if (node->getType() == Quantize) { -// auto* quantizeNode = dynamic_cast(node.get()); -// if (quantizeNode == nullptr) -// THROW_IE_EXCEPTION << "Cannot get quantize layer " << node->getName(); -// return !quantizeNode->isBinarization(); -// } else if (node->getType() == Eltwise) { -// auto* eltwiseNode = dynamic_cast(node.get()); -// if (eltwiseNode == nullptr) -// THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName(); -// -// return ((eltwiseNode->getOpType() == MulAdd) || -// (eltwiseNode->getOpType() == Prelu) || -// eltwiseNode->getOpType() == Relu); -// } -// -// return false; -// }; -// -// auto parent = graphNodes.begin(); -// while (parent != graphNodes.end()) { -// auto parentNode = *parent; -// if (!isSutableParentNode(parentNode)) { -// parent++; -// continue; -// } -// -// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); -// if (!isSutableChildNode(childNode)) { -// parent++; -// continue; -// } -// -// parentNode->fuseWith(childNode); -// -// if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { -// auto parentEdges = childNode->parentEdges; -// for (auto &parentEdge : parentEdges) { -// auto p_edge = parentEdge.lock(); -// if (p_edge->getParent()->getType() == MVN) -// continue; -// -// removeEdge(graph, p_edge); -// } -// } -// -// graph.DropNode(childNode); -// } + auto& graphNodes = graph.GetNodes(); + + auto isSutableParentNode = [](MKLDNNNodePtr node) { + bool isSutableMVN = (node->getType() == MVN) && (node->inDims[0].ndims() == 4 || node->inDims[0].ndims() == 5); + + if (isSutableMVN) { + auto mvnNode = std::dynamic_pointer_cast(node); + if (mvnNode == nullptr) + THROW_IE_EXCEPTION << "CPU node with name '" << node->getName() << "' is not a MVN node."; + + return mvnNode->getChildEdges().size() == 1 && !mvnNode->getAcrossChannels() && mvnNode->getNormalizeVariance(); + } else { + return false; + } + }; + + auto isSutableChildNode = [](MKLDNNNodePtr node) { + if (node->getType() == Quantize) { + auto* quantizeNode = dynamic_cast(node.get()); + if (quantizeNode == nullptr) + THROW_IE_EXCEPTION << "CPU node with name '" << node->getName() << "' is not a Quantize node."; + return !quantizeNode->isBinarization(); + } else if (node->getType() == Eltwise) { + return ((node->getAlgorithm() == EltwiseMulAdd) || + (node->getAlgorithm() == EltwisePrelu) || + (node->getAlgorithm() == EltwiseRelu)); + } + + return false; + }; + + auto parent = graphNodes.begin(); + while (parent != graphNodes.end()) { + auto parentNode = *parent; + if (!isSutableParentNode(parentNode)) { + parent++; + continue; + } + + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!isSutableChildNode(childNode)) { + parent++; + continue; + } + + parentNode->fuseWith(childNode); + + if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { + auto parentEdges = childNode->parentEdges; + for (auto &parentEdge : parentEdges) { + auto p_edge = parentEdge.lock(); + if (p_edge->getParent()->getType() == MVN) + continue; + + removeEdge(graph, p_edge); + } + } + + graph.DropNode(childNode); + } } void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 34a553c0c23f59..6e53360dc7f20e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -167,7 +167,7 @@ static const InferenceEngine::details::caseless_unordered_map // { "MemoryInput", MemoryInput}, // for construction from name ctor, arbitrary name is used // { "Memory", MemoryOutput }, // for construction from layer ctor // { "Convert", Convert }, -// { "MVN", MVN}, + { "MVN", MVN}, // { "Normalize", Normalize}, // { "ScatterUpdate", ScatterUpdate}, // { "ScatterElementsUpdate", ScatterElementsUpdate}, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index 3a526e994c876e..324f210b4c6b06 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -4,18 +4,15 @@ #include "mkldnn_mvn_node.h" -#include "mkldnn_quantize_node.h" -#include -#include "mkldnn_eltwise_node.h" -#include +#include #include #include -#include + +#include "mkldnn_quantize_node.h" +#include "mkldnn_eltwise_node.h" #include #include "utils/bfloat16.hpp" -#include #include "ie_parallel.hpp" -#include #include "emitters/jit_load_store_emitters.hpp" #include "emitters/jit_bf16_emitters.hpp" @@ -606,69 +603,72 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator ////////////////////////////////////////////////////////////////////////////////// MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache), epsMode_(insideSqrt) {} - -void MKLDNNMVNNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - + : MKLDNNNode(op, eng, cache) { std::string errPrefix = "MVN node with name '" + getName() + "' "; - auto cnnLayer = getCnnLayer(); - if (cnnLayer == nullptr) - THROW_IE_EXCEPTION << errPrefix << "does not have CNN layer."; - - if (getParentEdges().size() > 2) - THROW_IE_EXCEPTION << errPrefix << "has incorrect number of input edges."; - - if (getChildEdges().empty()) + if (op->get_output_size() != 1) THROW_IE_EXCEPTION << errPrefix << "has incorrect number of output edges."; - const auto& numOfDims = getParentEdgeAt(0)->getDims().ndims(); - if (numOfDims < 1 || numOfDims > 5) - THROW_IE_EXCEPTION << errPrefix << "doesn't support input with size of dimensions: " << numOfDims; + const auto& inDataShapeSize = op->input_value(0).get_shape().size(); + if (inDataShapeSize < 1 || inDataShapeSize > 5) + THROW_IE_EXCEPTION_WITH_STATUS(NOT_IMPLEMENTED) << errPrefix << "doesn't support input with size of dimensions: " << inDataShapeSize; + + if (auto mvnOp = ngraph::as_type_ptr(op)) { + if (mvnOp->get_input_size() != 2) + THROW_IE_EXCEPTION << errPrefix << "has incorrect number of input edges."; + + normalizeVariance_ = mvnOp->get_normalize_variance(); + epsValue_ = mvnOp->get_eps(); + auto epsMode = mvnOp->get_eps_mode(); + if (epsMode == ngraph::op::MVNEpsMode::INSIDE_SQRT) { + epsMode_ = INSIDE_SQRT; + } else if (epsMode == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) { + epsMode_ = INSIDE_SQRT; + } else { + THROW_IE_EXCEPTION_WITH_STATUS(NOT_IMPLEMENTED) << errPrefix << "does not support epsilon mode: " << epsMode; + } - across_channels = false; - if (getParentEdges().size() == 1) { - across_channels = cnnLayer->GetParamAsBool("across_channels"); + acrossChannels_ = false; + if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1) + acrossChannels_ = true; + } else if (auto mvnOp = ngraph::as_type_ptr(op)) { + if (mvnOp->get_input_size() != 1) + THROW_IE_EXCEPTION << errPrefix << "has incorrect number of input edges."; + + normalizeVariance_ = mvnOp->get_normalize_variance(); + epsValue_ = mvnOp->get_eps(); + epsMode_ = INSIDE_SQRT; + acrossChannels_ = mvnOp->get_across_channels(); } else { - if (numOfDims == getParentEdgeAt(1)->getDims().size() + 1 || numOfDims == 1) - across_channels = true; - } - normalize_variance = cnnLayer->GetParamAsBool("normalize_variance", true); - eps = cnnLayer->GetParamAsFloat("eps"); - auto epsMode = cnnLayer->GetParamAsString("eps_mode", ""); - if (details::CaselessEq()(epsMode, "inside_sqrt")) { - epsMode_ = insideSqrt; - } else if (details::CaselessEq()(epsMode, "outside_sqrt")) { - epsMode_ = outsideSqrt; + THROW_IE_EXCEPTION_WITH_STATUS(NOT_IMPLEMENTED) + << "CPU MVN node doesn't support ngraph operation '" << op->get_type_name() << "' with name '" << op->get_friendly_name() << "'"; } } +void MKLDNNMVNNode::getSupportedDescriptors() { +} + void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; setPostOps(attr, true); - Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); + Precision inputPrecision = getOriginalInputPrecisions()[0]; if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5 - || across_channels != 0 || normalize_variance != 1) { + || acrossChannels_ || !normalizeVariance_) { if (!isFloatCompatible(inputPrecision)) { inputPrecision = Precision::FP32; } } - Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision(); + Precision outputPrecision = getOriginalOutputPrecisions()[0]; if (!mayiuse(avx512_core)) { if (outputPrecision == Precision::BF16) outputPrecision = Precision::FP32; } if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputPrecision = lastFusedLayer->outData[0]->getPrecision(); - } + outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0]; } // ref with float planar and no fusion @@ -688,7 +688,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { (getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) && !getParentEdgeAt(0)->getParent()->isConstant(); - const size_t inputsNum = getCnnLayer()->insData.size(); + const size_t inputsNum = getParentEdges().size(); InferenceEngine::LayerConfig config; config.dynBatchSupport = false; config.inConfs.resize(inputsNum); @@ -698,7 +698,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; if (inputsNum == 2) { - const auto& dims = getCnnLayer()->insData[1].lock()->getTensorDesc().getDims(); + const auto dims = getParentEdgeAt(1)->getDims().ToSizeVector(); config.inConfs[1].desc = TensorDesc(Precision::I32, dims, TensorDesc::getLayoutByDims(dims)); @@ -759,7 +759,7 @@ std::tuple MKLDNNMVNNode::get5dShapes(co case 3 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], 1); break; } case 4 : { shapes = std::make_tuple(dims[0], dims[1], 1, dims[2], dims[3]); break; } case 5 : { shapes = std::make_tuple(dims[0], dims[1], dims[2], dims[3], dims[4]); break; } - default : { THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't support planar layout with rank: " << dims.size(); } + default : { THROW_IE_EXCEPTION << "MVN layer with name '" << getName() << "' doesn't support planar layout with rank: " << dims.size(); } } return shapes; } @@ -781,8 +781,8 @@ void MKLDNNMVNNode::createPrimitive() { jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc)); jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc)); jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout(); - jcp.normalize_variance = normalize_variance; - jcp.across_channels = across_channels; + jcp.normalize_variance = normalizeVariance_; + jcp.across_channels = acrossChannels_; SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector(); int N = 0; std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = get5dShapes(in_dims); @@ -792,7 +792,7 @@ void MKLDNNMVNNode::createPrimitive() { jcp.normalize_variance = false; mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); - if (normalize_variance) { + if (normalizeVariance_) { jcp.normalize_variance = true; mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); } @@ -801,7 +801,7 @@ void MKLDNNMVNNode::createPrimitive() { jcp.normalize_variance = false; mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); - if (normalize_variance) { + if (normalizeVariance_) { jcp.normalize_variance = true; mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); } @@ -810,7 +810,7 @@ void MKLDNNMVNNode::createPrimitive() { jcp.normalize_variance = false; mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); - if (normalize_variance) { + if (normalizeVariance_) { jcp.normalize_variance = true; mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32(jcp)); } @@ -854,8 +854,8 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { auto dim = getParentEdgeAt(0)->getDesc().getDims(); if (mayiuse(cpu::x64::sse41)) { - if (!mvn_mean_kernel || (normalize_variance && !mvn_variance_kernel) || !mvn_kernel) { - THROW_IE_EXCEPTION << "MVN layer with name '" << getCnnLayer()->name << "' doesn't create kernel to execute on sse41 above platform."; + if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { + THROW_IE_EXCEPTION << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform."; } Layout layout = getParentEdgeAt(0)->getDesc().getLayout(); if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) { @@ -890,7 +890,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si for (size_t b = 0lu; b < N; b++) { size_t cb = b * C3; - if (across_channels) { + if (acrossChannels_) { // Calculate mean value for one instance in batch // Parallel sum for each channel float C3inv = 1.f / static_cast(C3); @@ -911,7 +911,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si // calculate variance value for one instance in batch // parallel sum for each channel - if (normalize_variance) { + if (normalizeVariance_) { float variance_temp = 0.0f; variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { float variance_internal = 0.0f; @@ -927,10 +927,10 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si }); float variance = 1.f; - if (epsMode_ == insideSqrt) - variance /= sqrtf(variance_temp * C3inv + eps); - else if (epsMode_ == outsideSqrt) - variance /= sqrtf(variance_temp * C3inv) + eps; + if (epsMode_ == INSIDE_SQRT) + variance /= sqrtf(variance_temp * C3inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance /= sqrtf(variance_temp * C3inv) + epsValue_; // mvn for one instance in batch parallel_for(C, [&](int c) { size_t cc = cb + c * C2; @@ -979,17 +979,17 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si mean *= C2inv; - if (normalize_variance) { + if (normalizeVariance_) { // variance for this channel float variance = 0.f; arg.mean = static_cast(&mean); arg.variance = static_cast(&variance); (*mvn_variance_kernel)(&arg); - if (epsMode_ == insideSqrt) - variance = 1.f / sqrtf(variance * C2inv + eps); - else if (epsMode_ == outsideSqrt) - variance = 1.f / (sqrtf(variance * C2inv) + eps); + if (epsMode_ == INSIDE_SQRT) + variance = 1.f / sqrtf(variance * C2inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance = 1.f / (sqrtf(variance * C2inv) + epsValue_); // mvn for this channel (*mvn_kernel)(&arg); @@ -1015,7 +1015,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si for (size_t b = 0lu; b < N; b++) { size_t cb = b * C3; - if (across_channels) { + if (acrossChannels_) { // Parallel sum for each channel for mean float C3inv = 1.f / static_cast(C3); float mean_temp = 0.0f; @@ -1031,7 +1031,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si float mean = mean_temp * C3inv; - if (normalize_variance) { + if (normalizeVariance_) { // parallel sum for each channel for variance float variance_temp = 0.0f; variance_temp = parallel_sum(C, variance_temp, [&](size_t c)->float { @@ -1044,10 +1044,10 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si }); float variance = 1.f; - if (epsMode_ == insideSqrt) - variance = 1.f / sqrtf(variance_temp * C3inv + eps); - else if (epsMode_ == outsideSqrt) - variance = 1.f / (sqrtf(variance_temp * C3inv) + eps); + if (epsMode_ == INSIDE_SQRT) + variance = 1.f / sqrtf(variance_temp * C3inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance = 1.f / (sqrtf(variance_temp * C3inv) + epsValue_); parallel_for(C, [&](int c) { size_t cc = cb + c * C2; @@ -1074,17 +1074,17 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si } mean *= C2inv; - if (normalize_variance) { + if (normalizeVariance_) { // variance for this channel float variance = 0.f; for (size_t sp = 0lu; sp < C2; sp++) { variance += (src_data_ptr[cc + sp] - mean) * (src_data_ptr[cc + sp] - mean); } - if (epsMode_ == insideSqrt) - variance = 1.f / sqrtf(variance * C2inv + eps); - else if (epsMode_ == outsideSqrt) - variance = 1.f / (sqrtf(variance * C2inv) + eps); + if (epsMode_ == INSIDE_SQRT) + variance = 1.f / sqrtf(variance * C2inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance = 1.f / (sqrtf(variance * C2inv) + epsValue_); // mvn for this channel for (size_t sp = 0lu; sp < C2; sp++) { @@ -1126,7 +1126,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si size_t C5 = C * D * H * W; size_t threads_num = parallel_get_num_threads(); - size_t aux_buffer_size = across_channels ? blk_size : rnd_up(C, blk_size); + size_t aux_buffer_size = acrossChannels_ ? blk_size : rnd_up(C, blk_size); std::vector mean_buffer(aux_buffer_size * threads_num); std::vector variance_buffer(aux_buffer_size * threads_num); @@ -1135,7 +1135,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si for (size_t b = 0lu; b < N; b++) { size_t b_offset = is_nhwc ? b * C5 : b * C3; - if (across_channels) { + if (acrossChannels_) { // mean for this instance in batch float C5inv = 1.f / static_cast(C5); float mean_temp = 0.0f; @@ -1172,7 +1172,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si }); float mean = mean_temp * C5inv; - if (normalize_variance) { + if (normalizeVariance_) { // variance: sum((x-mean)*(x-mean)) for one instance in batch float variance_temp = 0.0f; variance_temp = parallel_sum3d(CB, D, H, variance_temp, [&](size_t cb, size_t d, size_t h)->float { @@ -1200,10 +1200,10 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si }); float variance = 1.f; - if (epsMode_ == insideSqrt) - variance /= sqrtf(variance_temp * C5inv + eps); - else if (epsMode_ == outsideSqrt) - variance /= sqrtf(variance_temp * C5inv) + eps; + if (epsMode_ == INSIDE_SQRT) + variance /= sqrtf(variance_temp * C5inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance /= sqrtf(variance_temp * C5inv) + epsValue_; // mvn for one instance in batch parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) { size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size @@ -1265,7 +1265,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si for (size_t c = 0; c < C; c++) mean_buffer[c] *= size_inv; - if (normalize_variance) { + if (normalizeVariance_) { for (int i = 0; i < variance_buffer.size(); i++) variance_buffer[i] = 0.f; @@ -1291,10 +1291,10 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si variance_buffer[c] += variance_buffer[c + aux_buffer_size * i]; } for (size_t c = 0; c < C; c++) { - if (epsMode_ == insideSqrt) - variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + eps); - else if (epsMode_ == outsideSqrt) - variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + eps); + if (epsMode_ == INSIDE_SQRT) + variance_buffer[c] = 1.f / sqrtf(variance_buffer[c] * size_inv + epsValue_); + else if (epsMode_ == OUTSIDE_SQRT) + variance_buffer[c] = 1.f / (sqrtf(variance_buffer[c] * size_inv) + epsValue_); } parallel_for2d(D, H, [&](size_t d, size_t h) { @@ -1317,7 +1317,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si } }); } else { - // normalize_variance == false + // normalizeVariance_ == false parallel_for2d(D, H, [&](size_t d, size_t h) { for (size_t cb = 0; cb < CB; cb++) { size_t src_offset = is_nhwc ? b_offset + d * C1 + h * C0 + cb * blk_size diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h index 00a8056e3f771e..41e4f9269098ee 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h @@ -1,10 +1,9 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include #include #include #include @@ -87,6 +86,14 @@ class MKLDNNMVNNode : public MKLDNNNode { static bool checkAxesSuitability(const std::shared_ptr&); + inline bool getAcrossChannels() const { + return acrossChannels_; + }; + + inline bool getNormalizeVariance() const { + return normalizeVariance_; + }; + private: void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims); @@ -98,15 +105,15 @@ class MKLDNNMVNNode : public MKLDNNNode { std::tuple get5dShapes(const InferenceEngine::SizeVector& dims); - bool across_channels = false; - bool normalize_variance = true; - float eps = 1e-9f; + bool acrossChannels_ = false; + bool normalizeVariance_ = true; + float epsValue_ = 1e-9f; // Defines way to add epsilon: inside sqrt or outside. - enum epsType { - insideSqrt, - outsideSqrt + enum MVNEpsMode { + INSIDE_SQRT, + OUTSIDE_SQRT }; - epsType epsMode_; + MVNEpsMode epsMode_; InferenceEngine::Precision input_prec, output_prec; size_t src_data_size, dst_data_size;