Revert "[CPU] Add explicit storage for MemoryNode"

This reverts commit 0923a30.
openvinotoolkit · Jun 25, 2020 · 5f74a34 · 5f74a34
1 parent 0923a30
commit 5f74a34
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 70 deletions.
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -10,7 +10,6 @@
 #include "mkldnn_async_infer_request.h"
 #include "mkldnn_infer_request.h"
 #include "mkldnn_memory_state.h"
-#include "nodes/mkldnn_memory_node.hpp"
 #include "bf16transformer.h"
 #include <ie_util_internal.hpp>
 #include <graph_tools.hpp>
@@ -160,8 +159,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
     if (_graphs.size() == 1) {
         for (auto &node : _graphs.begin()->get()->GetNodes()) {
             if (node->getType() == MemoryInput) {
-                auto memoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(node.get());
-                auto state_store = memoryNode->getStore();
+                auto state_store = node->getChildEdgeAt(0)->getMemoryPtr();
                 auto state_name = node->getName();
 
                 // Remove suffix with pair ID. Internal information.

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -631,6 +631,10 @@ void MKLDNNGraph::AllocateWithReuse() {
             isConst  |= isConstOutput(edge);
             isOutput |= edge->getChild()->getType() == Output;
             isInput  |= edge->getParent()->getType() == Input;
+
+            // WA. MemoryOutput will keep data in that edge
+            // So need to make it immortal..
+            isConst |= edge->getParent()->getType() == MemoryInput;
         }
 
         if (reuse_io_tensors) {

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
@@ -31,91 +31,49 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() {
         return;
 
     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-//    if (precision != InferenceEngine::Precision::FP32)
-//        precision = InferenceEngine::Precision::FP32;
+    if (precision != InferenceEngine::Precision::FP32)
+        precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(1);
     config.inConfs[0].inPlace = -1;
     config.inConfs[0].constant = false;
-    config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()));
+    config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format::any);
     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, memory::format::any);
 }
 
+const MKLDNNEdgePtr MKLDNNMemoryOutputNode::getChildEdgeAt(size_t idx) const {
+    if (inputNode != nullptr) {
+        return inputNode->getChildEdgeAt(idx);
+    }
+    return MKLDNNNode::getChildEdgeAt(idx);
+}
+
 void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm)  {
     auto& srcMemory = getParentEdgeAt(0)->getMemory();
 
-    auto inputMemoryNode = dynamic_cast<MKLDNNMemoryInputNode*>(inputNode);
-    IE_ASSERT(inputMemoryNode != nullptr);
-    inputMemoryNode->storeState(srcMemory);
+    const float *src_ptr = reinterpret_cast<const float*>(srcMemory.GetData()) +
+            srcMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
+    float *dst_ptr = reinterpret_cast<float*>(getChildEdgeAt(0)->getMemory().GetData()) +
+            getChildEdgeAt(0)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+    // TODO: this can be eliminated by completely removing MKLDNN memory output NODE, to fuse it with output of prev layer
+    memcpy(dst_ptr, src_ptr, srcMemory.GetSize());
 }
 
 #if defined (COMPILED_CPU_MKLDNN_INPUT_NODE)
 MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer), dataStore(new MKLDNNMemory{eng}) {
+        : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer) {
     if (created()) {
         holder = MKLDNNMemoryNodeVirtualEdge::registerInput(this);
     }
 }
 
-void MKLDNNMemoryInputNode::createPrimitive() {
-    MKLDNNInputNode::createPrimitive();
-
-    auto mem_desc = getChildEdgeAt(0)->getMemoryPtr()->GetDescriptor();
-    dataStore->Create(mem_desc);
-
-    // default memory state is zero filled
-    dataStore->FillZero();
-}
-
-/**
- * Copy data from one tensor into other.
- * As is. Assume that data is dense tensor with same layout.
- * @param dst destination memory object
- * @param src source memory object
- */
-inline
-static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {
-    auto getDataWithOff = [] (const MKLDNNMemory& mem) {
-        auto elemSize = MKLDNNExtensionUtils::sizeOfDataType(mem.GetDataType());
-        return static_cast<uint8_t*>(mem.GetData()) +
-                mem.GetDescriptor().data.layout_desc.blocking.offset_padding * elemSize;
-    };
-
-    auto srcPtr = getDataWithOff(src);
-    auto dstPtr = getDataWithOff(dst);
-    auto srcSizeInByte = src.GetSize();
-    auto dstSizeInByte = dst.GetSize();
-
-    IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes.";
-
-    memcpy(dstPtr, srcPtr, srcSizeInByte);
-}
-
 MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() {
     MKLDNNMemoryNodeVirtualEdge::remove(this, holder);
 }
 
-MKLDNNMemoryPtr MKLDNNMemoryInputNode::getStore() {
-    return dataStore;
-}
-
-void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) {
-    // TODO: Should be next one call:
-    //           dataStore.SetData(new_state, false);
-    //       But because of performance reason we use simple manual copy
-    simple_copy(*dataStore, new_state);
-}
-
-void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) {
-    auto dst_mem = getChildEdgeAt(0)->getMemory();
-    // TODO: Should be simple call of:
-    //           dst_mem.SetData(dataStore, false);
-    //       But because of performance reason we use simple manual copy
-    simple_copy(dst_mem, *dataStore);
-}
-
 MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) {
     std::lock_guard<std::mutex> lock{MKLDNNMemoryNodeVirtualEdge::holderMutex};
     // in case of output already registered

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
@@ -69,6 +69,7 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode {
     ~MKLDNNMemoryOutputNode() override;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
+    const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const override;
     void createPrimitive() override {}
     void execute(mkldnn::stream strm) override;
     bool created() const override {
@@ -78,7 +79,6 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode {
     void setInputNode(MKLDNNNode* node) override {
         inputNode = node;
     }
-
  private:
     /**
      * @brief keeps reference to input sibling node
@@ -97,15 +97,9 @@ class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode {
     bool created() const override {
         return getType() == MemoryInput;
     }
-    void execute(mkldnn::stream strm) override;
-
-    void createPrimitive() override;
 
     void setInputNode(MKLDNNNode* node) override {}
-    void storeState(const MKLDNNMemory& mem);
-    MKLDNNMemoryPtr getStore();
  private:
-    MKLDNNMemoryPtr dataStore;
     static Register<MKLDNNMemoryInputNode> reg;
     MKLDNNMemoryNodeVirtualEdge::Holder* holder = nullptr;
 };