allnes · allnes · Jul 4, 2023 · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023
diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp
diff --git a/src/plugins/intel_cpu/src/nodes/deconv.h b/src/plugins/intel_cpu/src/nodes/deconv.h
@@ -9,7 +9,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "common/dnnl_executor.h"
+#include "executors/deconv_list.hpp"
 
 namespace ov {
 namespace intel_cpu {
@@ -20,6 +20,7 @@ class Deconvolution : public Node {
     Deconvolution(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
     void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
                           const std::vector<MemoryDescPtr>& outputDesc) override;
     void createPrimitive() override;
@@ -41,7 +42,7 @@ class Deconvolution : public Node {
     bool canFuse(const NodePtr& node) const override;
 
     const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
-    const std::vector<ptrdiff_t>& getStride() const { return stride; }
+    const std::vector<ptrdiff_t>& getStride() const { return deconvAttrs.stride; }
 
     void prepareParams() override;
     void execute(dnnl::stream strm) override;
@@ -58,46 +59,14 @@ class Deconvolution : public Node {
 
 private:
     using executorPtr = std::shared_ptr<DnnlExecutor>;
-    executorPtr execPtr = nullptr;
-
-    class DeconvExecutorDefault : public DnnlExecutor {
-        public:
-            DeconvExecutorDefault(const dnnl::convolution_backward_data::primitive_desc& pd,
-                                  const dnnl::memory::desc& inMemDesc,
-                                  const dnnl::memory::desc& weightMemDesc,
-                                  const dnnl::memory::desc& outMemDesc,
-                                  const dnnl::engine& engine);
-    };
-
-    class DeconvExecutorInt8 : public DnnlExecutor {
-        public:
-            DeconvExecutorInt8(const dnnl::deconvolution_forward::primitive_desc& pd,
-                               const dnnl::memory::desc& inMemDesc,
-                               const dnnl::memory::desc& weightMemDesc,
-                               const dnnl::memory::desc& outMemDesc,
-                               const dnnl::engine& engine);
-    };
+    std::shared_ptr<DeconvExecutor> execPtrDeconv = nullptr;
+
     // have to hold reference (shared_ptr) to forward convolution primitive_desc
     // since backward one uses the reference to it as a hint
     std::vector<dnnl::convolution_forward::primitive_desc> fwdConvPD;
 
-    bool withGroups = false;
-    bool isDW = false;
-    bool isInt8 = false;
-    bool autoPad = false;
-    bool externOutShape = false;
-    size_t groupNum = 1;
-    size_t IC = 0;
-    size_t OC = 0;
-    std::vector<ptrdiff_t> kernel;
-    std::vector<ptrdiff_t> stride;
-    std::vector<ptrdiff_t> dilation;
-    ov::CoordinateDiff paddingL;
-    ov::CoordinateDiff paddingR;
-    ov::CoordinateDiff outputPadding;
-    std::vector<int32_t> lastOutputSpatialDims;
-    VectorDims int8WeightDims;
-    VectorDims expectedBiasDims {};
+    bool useACL = false;
+    DeconvAttrs deconvAttrs;
 
     Shape inShape;
 
@@ -112,7 +81,6 @@ class Deconvolution : public Node {
     void initPaddingR(const Shape &inShape, const Shape &outShape);
     std::vector<int32_t> readOutputSpatialDims() const;
     std::pair<VectorDims, VectorDims> makeDummyInOutShape();
-    bool withBiases = false;
     size_t biasPort;
 
     std::string errorPrefix;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
@@ -0,0 +1,128 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "acl_deconv.hpp"
+#include "acl_utils.hpp"
+#include "ie_parallel.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+using namespace arm_compute;
+
+//FIXME: add context
+AclDeconvExecutor::AclDeconvExecutor() : DeconvExecutor() {}
+
+bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
+                          const std::vector<MemoryDescPtr>& srcDescs,
+                          const std::vector<MemoryDescPtr>& dstDescs,
+                          const dnnl::primitive_attr &attr) {
+    this->deconvAttrs = deconvAttrs;
+    auto srcDims  = srcDescs[0]->getShape().getStaticDims();
+    auto weiDims  = srcDescs[1]->getShape().getStaticDims();
+    //swap input and output channels dimensions to be align with ACL
+    //weights tensor shape is changed because ACL expects [W, H, I, O] tensor while OV uses [I, O, H, W] tensor
+    std::swap(weiDims[0], weiDims[1]);
+    auto dstDims  = dstDescs[0]->getShape().getStaticDims();
+
+    VectorDims biasDims;
+    TensorInfo biasTensorInfo;
+    if (deconvAttrs.withBiases) {
+        biasDims = srcDescs[2]->getShape().getStaticDims();
+        //bias presicion is I32 but ACL requests bias precision as input ones
+        biasTensorInfo = TensorInfo(shapeCast(biasDims), 1,
+        precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
+    }
+
+    TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
+    precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
+    TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1,
+    precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1]));
+    TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
+    precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));
+
+    unsigned int pad_l = deconvAttrs.paddingL.at(1);
+    unsigned int pad_r = deconvAttrs.paddingR.at(1);
+    unsigned int pad_t = deconvAttrs.paddingL.at(0);
+    unsigned int pad_b = deconvAttrs.paddingR.at(0);
+    unsigned int stride_x = deconvAttrs.stride.at(1);
+    unsigned int stride_y = deconvAttrs.stride.at(0);
+    unsigned int dilation_x = deconvAttrs.dilation.at(1) + 1;
+    unsigned int dilation_y = deconvAttrs.dilation.at(0) + 1;
+
+    arm_compute::PadStrideInfo deconv_info(stride_x, stride_y, pad_l, pad_r, pad_t, pad_b, arm_compute::DimensionRoundingType::FLOOR);
+    arm_compute::Size2D dilation(dilation_x, dilation_y);
+
+    arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
+                                                                           &weiTensorInfo,
+                                                                           deconvAttrs.withBiases ? &biasTensorInfo : nullptr,
+                                                                           &dstTensorInfo,
+                                                                           deconv_info);
+    if (!status) {
+        DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
+        return false;
+    }
+
+    srcTensor.allocator()->init(srcTensorInfo);
+    weiTensor.allocator()->init(weiTensorInfo);
+    dstTensor.allocator()->init(dstTensorInfo);
+    if (deconvAttrs.withBiases)
+        biasTensor.allocator()->init(biasTensorInfo);
+
+    deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>();
+    deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiases ? &biasTensor : nullptr, &dstTensor, deconv_info);
+
+    return true;
+}
+
+static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
+    const auto src_data = reinterpret_cast<float*>(srcMemPtr->GetPtr());
+
+    const int DIM0 = srcMemPtr->getStaticDims()[0];
+    const int DIM1 = srcMemPtr->getStaticDims()[1];
+    const int DIM2 = srcMemPtr->getStaticDims()[2];
+    const int DIM3 = srcMemPtr->getStaticDims()[3];
+
+    parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) {
+                for (int dim3 = 0; dim3 < DIM3; ++dim3) {
+                    const int src_off = dim0 * DIM1 * DIM2 * DIM3 +
+                                        dim1 * DIM2 * DIM3 +
+                                        dim2 * DIM3 +
+                                        dim3;
+                    const int dst_off = dim1 * DIM0 * DIM2 * DIM3 +
+                                        dim0 * DIM2 * DIM3 +
+                                        dim2 * DIM3 +
+                                        dim3;
+
+                    dst_data[dst_off] = src_data[src_off];
+                }
+    });
+}
+
+void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst,
+                             const void *post_ops_data_, const dnnl::stream &strm) {
+    //weights tensor shape is changed because ACL expects [W, H, I, O] tensor while OV uses [I, O, H, W] tensor
+    std::vector<float> weiBuffer(src[1]->getStaticDims()[0] *
+                                 src[1]->getStaticDims()[1] *
+                                 src[1]->getStaticDims()[2] *
+                                 src[1]->getStaticDims()[3]);
+    transpose_to_1023(src[1], weiBuffer);
+
+    srcTensor.allocator()->import_memory(src[0]->GetPtr());
+    dstTensor.allocator()->import_memory(dst[0]->GetPtr());
+    weiTensor.allocator()->import_memory(weiBuffer.data());
+    if (deconvAttrs.withBiases)
+        biasTensor.allocator()->import_memory(src[2]->GetPtr());
+
+    deconv->run();
+
+    srcTensor.allocator()->free();
+    dstTensor.allocator()->free();
+    weiTensor.allocator()->free();
+    if (deconvAttrs.withBiases)
+        biasTensor.allocator()->free();
+}
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
@@ -0,0 +1,112 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "nodes/executors/deconv.hpp"
+#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "utils/debug_capabilities.h"
+
+namespace ov {
+namespace intel_cpu {
+
+class AclDeconvExecutor : public DeconvExecutor {
+public:
+    AclDeconvExecutor();
+
+    bool init(const DeconvAttrs& deconvAttrs,
+              const std::vector<MemoryDescPtr>& srcDescs,
+              const std::vector<MemoryDescPtr>& dstDescs,
+              const dnnl::primitive_attr &attr) override;
+    void exec(const std::vector<MemoryCPtr>& src,
+              const std::vector<MemoryPtr>& dst,
+              const void *post_ops_data_,
+              const dnnl::stream &strm) override;
+
+    impl_desc_type getImplType() const override {
+        return implType;
+    }
+
+private:
+    DeconvAttrs deconvAttrs;
+    impl_desc_type implType = impl_desc_type::acl;
+
+    arm_compute::Tensor srcTensor;
+    arm_compute::Tensor weiTensor;
+    arm_compute::Tensor biasTensor;
+    arm_compute::Tensor dstTensor;
+    std::unique_ptr<arm_compute::NEDeconvolutionLayer> deconv = nullptr;
+};
+
+class AclDeconvExecutorBuilder : public DeconvExecutorBuilder {
+public:
+    bool isSupported(const DeconvAttrs& deconvAttrs,
+                     const std::vector<MemoryDescPtr>& srcDescs,
+                     const std::vector<MemoryDescPtr>& dstDescs) const override {
+        if (srcDescs[0]->getShape().getDims().size() != 2 &&
+            srcDescs[1]->getShape().getDims().size() != 2 &&
+            dstDescs[0]->getShape().getDims().size() != 2) {
+            DEBUG_LOG("AclDeconvExecutor does not support dimension:",
+                      " src[0]=", srcDescs[0]->getPrecision(),
+                      " src[1]=", srcDescs[1]->getPrecision(),
+                      " dst[0]=", dstDescs[0]->getPrecision());
+            return false;
+        }
+        if ((srcDescs[0]->getPrecision() != InferenceEngine::Precision::FP32 &&
+             srcDescs[1]->getPrecision() != InferenceEngine::Precision::FP32 &&
+             dstDescs[0]->getPrecision() != InferenceEngine::Precision::FP32) &&
+            (srcDescs[0]->getPrecision() != InferenceEngine::Precision::FP16 &&
+             srcDescs[1]->getPrecision() != InferenceEngine::Precision::FP16 &&
+             dstDescs[0]->getPrecision() != InferenceEngine::Precision::FP16)) {
+            DEBUG_LOG("AclDeconvExecutor does not support precisions:",
+                      " src[0]=", srcDescs[0]->getPrecision(),
+                      " src[1]=", srcDescs[1]->getPrecision(),
+                      " dst[0]=", dstDescs[0]->getPrecision());
+            return false;
+        }
+        if (deconvAttrs.withBiases &&
+           srcDescs[2]->getPrecision() != srcDescs[0]->getPrecision()) {
+            DEBUG_LOG("AclDeconvExecutor does not support precisions:",
+                      " src[2]=", srcDescs[2]->getPrecision());
+            return false;
+           }
+
+        if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
+              srcDescs[1]->hasLayoutType(LayoutType::ncsp) &&
+              dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
+            !(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
+              srcDescs[1]->hasLayoutType(LayoutType::nspc) &&
+              dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
+                DEBUG_LOG("AclDeconvExecutor does not support layouts:",
+                    " src[0]=", srcDescs[0]->serializeFormat(),
+                    " src[1]=", srcDescs[1]->serializeFormat(),
+                    " dst=", dstDescs[0]->serializeFormat());
+                return false;
+              }
+        if (deconvAttrs.withBiases &&
+            !(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
+              srcDescs[1]->hasLayoutType(LayoutType::ncsp) &&
+              srcDescs[2]->hasLayoutType(LayoutType::ncsp) &&
+              dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
+            !(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
+              srcDescs[1]->hasLayoutType(LayoutType::nspc) &&
+              srcDescs[2]->hasLayoutType(LayoutType::nspc) &&
+              dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
+                DEBUG_LOG("AclDeconvExecutor does not support layouts:",
+                    " src[0]=", srcDescs[0]->serializeFormat(),
+                    " src[1]=", srcDescs[1]->serializeFormat(),
+                    " src[2]=", srcDescs[2]->serializeFormat(),
+                    " dst=", dstDescs[0]->serializeFormat());
+                return false;
+              }
+        return true;
+    }
+
+    DeconvExecutorPtr makeExecutor() const override {
+        return std::make_shared<AclDeconvExecutor>();
+    }
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/deconv.cpp b/src/plugins/intel_cpu/src/nodes/executors/deconv.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "deconv.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+using namespace InferenceEngine;
+
+DeconvExecutor::DeconvExecutor() {}
+
+size_t DeconvKey::hash() const {
+    using namespace dnnl::impl;
+    using namespace dnnl::impl::primitive_hashing;
+
+    size_t seed = 0;
+
+    for (const auto& ptr : {inp0, inp1, bias, out}) {
+        if (ptr) {
+            seed = hash_combine(seed, get_md_hash(*ptr->getDnnlDesc().get()));
+        }
+    }
+
+    seed = get_vector_hash(seed, stride);
+    seed = get_vector_hash(seed, dilation);
+    seed = get_vector_hash(seed, paddingL);
+    seed = get_vector_hash(seed, paddingR);
+
+    seed = hash_combine(seed, isInt8);
+
+    seed = hash_combine(seed, get_attr_hash(*attr.get()));
+    seed = hash_combine(seed, implType);
+    return seed;
+}
+
+bool DeconvKey::operator==(const DeconvKey &rhs) const {
+    bool retVal = true;
+    if (inp0 != rhs.inp0) {
+        retVal = retVal && inp0 && rhs.inp0 && inp0->getDnnlDesc() == rhs.inp0->getDnnlDesc();
+    }
+    if (inp1 != rhs.inp1) {
+        retVal = retVal && inp1 && rhs.inp1 && inp1->getDnnlDesc() == rhs.inp1->getDnnlDesc();
+    }
+
+    if (bias != rhs.bias) {
+        retVal = retVal && bias && rhs.bias && bias->getDnnlDesc() == rhs.bias->getDnnlDesc();
+    }
+
+    if (out != rhs.out) {
+        retVal = retVal && out && rhs.out && out->getDnnlDesc() == rhs.out->getDnnlDesc();
+    }
+
+    retVal = retVal && stride == rhs.stride;
+    retVal = retVal && dilation == rhs.dilation;
+    retVal = retVal && paddingL == rhs.paddingL;
+    retVal = retVal && paddingR == rhs.paddingR;
+
+    retVal = retVal && isInt8 == rhs.isInt8;
+
+    retVal = retVal && *attr.get() == *rhs.attr.get() && implType == rhs.implType;
+    return retVal;
+}
+
+}   // namespace intel_cpu
+}   // namespace ov