[CPU] Add Roll support (openvinotoolkit#5112)

mandrono · May 3, 2021 · 5d8f209 · 5d8f209
1 parent 0808975
commit 5d8f209
Show file tree

Hide file tree

Showing 11 changed files with 463 additions and 3 deletions.
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -143,6 +143,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "ReduceSum", ReduceSum},
         { "ReduceSumSquare", ReduceSumSquare},
         { "Erf", Eltwise },
+        { "Roll", Roll },
 };
 
 Type TypeFromName(const std::string type) {

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -87,7 +87,8 @@ enum Type {
     ReduceOr,
     ReduceProd,
     ReduceSum,
-    ReduceSumSquare
+    ReduceSumSquare,
+    Roll
 };
 
 Type TypeFromName(const std::string type);
@@ -206,6 +207,8 @@ static std::string NameFromType(Type type) {
             return "ReduceSum";
         case ReduceSumSquare:
             return "ReduceSumSquare";
+        case Roll:
+            return "Roll";
         default:
             return "Unknown";
     }

diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
@@ -0,0 +1,209 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include <cmath>
+#include <mkldnn_extension_utils.h>
+
+#include "mkldnn_roll_node.h"
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+#include "mkldnn/ie_mkldnn.h"
+#include "utils/general_utils.h"
+#include "common/cpu_memcpy.h"
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+MKLDNNRollNode::MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+                MKLDNNNode(layer, eng, cache) {
+    layerErrorPrefix = "Roll layer with name '" + layer->name + "'";
+    if (layer->insData.size() != numberOfInputs) {
+        IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
+    }
+
+    /* Data */
+    auto data = layer->insData[DATA_INDEX].lock();
+    if (data == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable data";
+    }
+
+    const auto &dataTensor = data->getTensorDesc();
+    shape = dataTensor.getDims();
+    const auto &dataPrecision = dataTensor.getPrecision();
+
+    if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
+        IE_THROW() << layerErrorPrefix << "has unsupported precision: " << dataPrecision.name();
+
+    if (shape.size() < 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << shape.size();
+    }
+    numOfDims = shape.size();
+
+    if (shape != layer->outData[0]->getTensorDesc().getDims()) {
+        IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
+    }
+
+    /* Axes */
+    auto axesData = layer->insData[AXES_INDEX].lock();
+    if (axesData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
+    }
+    const auto& axesTensor = axesData->getTensorDesc();
+    const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
+    if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
+        IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
+    }
+
+    const auto axesTensorRank = axesTensor.getDims().size();
+    if (axesTensorRank > 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
+    }
+
+    /* Shift */
+    auto shiftData = layer->insData[SHIFT_INDEX].lock();
+    if (shiftData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
+    }
+    const auto& shiftTensor = shiftData->getTensorDesc();
+    const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
+    if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
+        IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
+    }
+
+    const auto shiftTensorRank = shiftTensor.getDims().size();
+    if (shiftTensorRank > 1) {
+        IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
+    }
+}
+void MKLDNNRollNode::getSupportedDescriptors() {}
+
+void MKLDNNRollNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    auto inputData = getCnnLayer()->insData[0].lock();
+
+    if (inputData == nullptr) {
+        IE_THROW() << layerErrorPrefix << " has nullable 'data'";
+    }
+
+    InferenceEngine::Precision precision = inputData->getPrecision();
+
+    auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+
+    auto srcDims = getParentEdgeAt(0)->getDims();
+
+    auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
+    InferenceEngine::LayerConfig config;
+    config.dynBatchSupport = false;
+
+    auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
+        InferenceEngine::DataConfig dataConfig;
+        dataConfig.inPlace = -1;
+        dataConfig.constant = false;
+        dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
+        return dataConfig;
+    };
+
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32));
+
+    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType));
+
+    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat});
+}
+
+
+void MKLDNNRollNode::execute(mkldnn::stream strm) {
+    const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision();
+    const auto& dataTypeSize = dataPrecision.size();
+    switch (dataTypeSize) {
+        case sizeof(PrecisionTrait<Precision::I8>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I8>::value_type>();
+            break;
+        }
+        case sizeof(PrecisionTrait<Precision::I16>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I16>::value_type>();
+            break;
+        }
+        case sizeof(PrecisionTrait<Precision::I32>::value_type): {
+            rollImpl<PrecisionTrait<Precision::I32>::value_type>();
+            break;
+        }
+        default:
+            IE_THROW() << layerErrorPrefix <<  "has unsupported 'data' input precision: " << dataPrecision.name();
+    }
+}
+
+size_t MKLDNNRollNode::calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) {
+    size_t pos = dataOffset / segmentSize % dimSize;
+    size_t shift = (pos + dimShift) % dimSize - pos;
+    return dataOffset + shift * segmentSize;
+}
+
+template <typename DataType>
+void MKLDNNRollNode::rollImpl() {
+    const auto dataEdge = getParentEdgeAt(DATA_INDEX);
+    const auto axesEdge = getParentEdgeAt(AXES_INDEX);
+    const auto shiftsEdge = getParentEdgeAt(SHIFT_INDEX);
+
+    const auto *axes = reinterpret_cast<const int32_t*>(axesEdge->getMemoryPtr()->GetPtr());
+    const auto *shifts = reinterpret_cast<const int32_t*>(shiftsEdge->getMemoryPtr()->GetPtr());
+
+    const auto *input = reinterpret_cast<const DataType*>(dataEdge->getMemoryPtr()->GetPtr());
+    auto *output = reinterpret_cast<DataType*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    std::vector<size_t> shiftsVector(numOfDims, 0);
+
+    const size_t axesLength = axesEdge->getDims()[0];
+    for (size_t dim = 0; dim < axesLength ; ++dim) {
+        int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
+        int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
+        int32_t dimSize = shape[currentAxis];
+        shiftsVector[currentAxis] = (shiftSum % dimSize + dimSize) % dimSize;
+    }
+
+    const size_t blockSize = shape.back();
+    const size_t totalElements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+    const size_t leftBlockSize = blockSize - shiftsVector.back();
+    const size_t rightBlockSize = blockSize - leftBlockSize;
+    const size_t elementSize = sizeof(DataType);
+
+    const size_t nIterations = totalElements / blockSize;
+    const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides();
+    parallel_for(nIterations, [&](size_t iter) {
+        size_t start = iter * blockSize;
+        size_t leftBlockStartOffset = start;
+        size_t rightBlockStartOffset = start + leftBlockSize;
+
+        for (int dim = numOfDims - 1; dim >= 0; --dim) {
+            leftBlockStartOffset = calculateShiftOffset(leftBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
+            rightBlockStartOffset = calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
+        }
+
+        if (leftBlockSize > 0)
+            cpu_memcpy(output + leftBlockStartOffset,
+                       input + start,
+                       leftBlockSize * elementSize);
+
+
+        if (rightBlockSize > 0)
+            cpu_memcpy(output + rightBlockStartOffset,
+                       input + (start + leftBlockSize),
+                       rightBlockSize * elementSize);
+    });
+}
+
+bool MKLDNNRollNode::created() const {
+    return getType() == Roll;
+}
+
+void MKLDNNRollNode::createPrimitive() {}
+
+const std::vector<size_t> MKLDNNRollNode::supportedPrecisionSizes = {1, 2, 4};
+
+REG_MKLDNN_PRIM_FOR(MKLDNNRollNode, Roll)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNRollNode : public MKLDNNNode {
+public:
+    MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    ~MKLDNNRollNode() override = default;
+
+    void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+private:
+    size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize);
+
+    template <typename DataType>
+    void rollImpl();
+
+    std::vector<size_t> shape;
+    const static std::vector<size_t> supportedPrecisionSizes;
+    std::string layerErrorPrefix;
+    size_t numOfDims;
+
+    const size_t DATA_INDEX = 0ul;
+    const size_t SHIFT_INDEX = 1ul;
+    const size_t AXES_INDEX = 2ul;
+    const size_t numberOfInputs = 3ul;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/...nce-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp b/...nce-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/roll.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+    InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::BF16
+};
+
+const auto testCase2DZeroShifts = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{17, 19}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{0, 0}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase1D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{16}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{5}), // Shift
+    ::testing::Values(std::vector<int64_t>{0}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase2D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{600, 450}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{300, 250}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase3D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 320, 320}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{160, 160}), // Shift
+    ::testing::Values(std::vector<int64_t>{1, 2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeUnorderedAxes4D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{3, 11, 6, 4}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{7, 3}), // Shift
+    ::testing::Values(std::vector<int64_t>{-3, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseRepeatingAxes5D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 16, 32, 32}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{16, 15, 10, 2, 1, 7, 2, 8, 1, 1}), // Shift
+    ::testing::Values(std::vector<int64_t>{-1, -2, -3, 1, 0, 3, 3, 2, -2, -3}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeShifts6D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -15, -2, -1, -4, -1}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1, 2, 3, 4, 5}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 2, 4, 2, 3, 6, 3, 2, 3, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -1, 1, 1, 1, -2}), // Shift
+    ::testing::Values(std::vector<int64_t>{-6, -4, -3, 1, -10, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d_zero_shifts, RollLayerTest, testCase2DZeroShifts, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/roll.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(RollLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions