[CPU] Add Roll support

openvinotoolkit · Apr 19, 2021 · c0ab191 · c0ab191
1 parent 26801c1
commit c0ab191
Show file tree

Hide file tree

Showing 8 changed files with 432 additions and 0 deletions.
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -83,3 +83,4 @@ MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
 MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
 MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
 MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
+MKLDNN_EXTENSION_NODE(RollImpl, Roll);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/roll.cpp b/inference-engine/src/mkldnn_plugin/nodes/roll.cpp
@@ -0,0 +1,233 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "list.hpp"
+#include "base.hpp"
+
+#include <string>
+#include <vector>
+#include <cmath>
+
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+#include "mkldnn/ie_mkldnn.h"
+#include "utils/general_utils.h"
+#include "common/cpu_memcpy.h"
+
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class RollImpl: public ExtLayerBase {
+public:
+    explicit RollImpl(const CNNLayer* layer) {
+        try {
+            layerName = layer->name;
+            const std::string layerErrorPrefix = "Roll layer with name '" + layerName + "'";
+            if (layer->insData.size() != numberOfInputs) {
+                IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
+            }
+
+            /* Data */
+            auto data = layer->insData[DATA_INDEX].lock();
+            if (data == nullptr) {
+                IE_THROW() << layerErrorPrefix << " has nullable data";
+            }
+            const auto &dataTensor = data->getTensorDesc();
+            const auto &dataShape = dataTensor.getDims();
+            if (dataShape.size() < 1) {
+                IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << dataShape.size();
+            }
+            numOfDims = dataShape.size();
+
+            const auto& dataPrecision = dataTensor.getPrecision();
+            if (!MKLDNNPlugin::one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::I32, Precision::FP32, Precision::I64,
+                                     Precision::U64, Precision::BF16)) {
+                IE_THROW() << layerErrorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
+            }
+
+            if (dataShape != layer->outData[0]->getTensorDesc().getDims()) {
+                IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
+            }
+
+            /* Axes */
+            auto axesData = layer->insData[AXES_INDEX].lock();
+            if (axesData == nullptr) {
+                IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
+            }
+            const auto& axesTensor = axesData->getTensorDesc();
+            const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
+            if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
+                IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
+            }
+
+            const auto axesTensorRank = axesTensor.getDims().size();
+            if (axesTensorRank > 1) {
+                IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
+            }
+
+            /* Shift */
+            auto shiftData = layer->insData[SHIFT_INDEX].lock();
+            if (shiftData == nullptr) {
+                IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
+            }
+            const auto& shiftTensor = shiftData->getTensorDesc();
+            const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
+            if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
+                IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
+            }
+
+            const auto shiftTensorRank = shiftTensor.getDims().size();
+            if (shiftTensorRank > 1) {
+                IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
+            }
+
+            shape = dataShape;
+
+            LayerConfig config;
+            for (size_t i = 0; i < layer->insData.size(); i++) {
+                DataConfig inConfig;
+                inConfig.inPlace = -1;
+                inConfig.constant = false;
+                auto inputData = layer->insData[i].lock();
+                if (inputData == nullptr) {
+                    IE_THROW() << layerErrorPrefix << " has nullable input data at " << i;
+                }
+                Precision inPrecision = i > DATA_INDEX ? Precision(Precision::I32) : inputData->getTensorDesc().getPrecision();
+                const SizeVector& inDims = inputData->getTensorDesc().getDims();
+                inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
+                config.inConfs.push_back(inConfig);
+            }
+            DataConfig outConfig;
+            outConfig.inPlace = -1;
+            outConfig.constant = false;
+            Precision outPrecision = data->getTensorDesc().getPrecision();
+            const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
+            outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
+
+            config.outConfs.push_back(outConfig);
+
+            config.dynBatchSupport = false;
+            confs.push_back(config);
+        } catch (InferenceEngine::Exception &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs, ResponseDesc *resp) noexcept {
+        const auto& dataPrecision = inputs[DATA_INDEX]->getTensorDesc().getPrecision();
+        switch (dataPrecision) {
+            case Precision::I8: {
+                rollImpl<int8_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::U8: {
+                rollImpl<uint8_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::I16: {
+                rollImpl<int16_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::I32: {
+                rollImpl<int32_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::FP32: {
+                rollImpl<float>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::I64: {
+                rollImpl<int64_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            case Precision::U64: {
+                rollImpl<uint64_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
+                break;
+            }
+            default: {
+                if (resp) {
+                    std::string errorMsg = layerName + " has unsupported 'data' input precision: " + dataPrecision.name();
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return GENERAL_ERROR;
+            }
+        }
+        return OK;
+    }
+
+private:
+    size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) {
+        size_t pos = dataOffset / segmentSize % dimSize;
+        size_t shift = (pos + dimShift) % dimSize - pos;
+        return dataOffset + shift * segmentSize;
+    }
+
+    template<typename DataType>
+    void rollImpl(const Blob::CPtr &inputBlob, const Blob::CPtr &shiftsBlob, const Blob::CPtr &axesBlob, const Blob::Ptr &outputBlob) {
+        const auto *axes = axesBlob->cbuffer().as<const int32_t*>() + axesBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        const auto *shifts = shiftsBlob->cbuffer().as<const int32_t *>() + shiftsBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        const auto *input =
+                inputBlob->cbuffer().as<const DataType *>() + inputBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        auto *output = outputBlob->buffer().as<DataType *>() + outputBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        std::vector<size_t> shiftsVector(numOfDims, 0);
+        for (size_t dim = 0; dim < axesBlob->size(); ++dim) {
+            int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
+            int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
+            int32_t dimSize = shape[currentAxis];
+            shiftsVector[currentAxis] = (shiftSum % dimSize + dimSize) % dimSize;
+        }
+
+        const size_t blockSize = shape.back();
+        const size_t totalElements = inputBlob->size();
+        const size_t leftBlockSize = blockSize - shiftsVector.back();
+        const size_t rightBlockSize = blockSize - leftBlockSize;
+        const size_t elementSize = sizeof(DataType);
+
+        size_t start = 0;
+        while (start < totalElements) {
+           size_t leftBlockStartOffset = start;
+           size_t rightBlockStartOffset = start + leftBlockSize;
+
+           size_t segmentSize = 1;
+           for (int dim = numOfDims - 1; dim >= 0; --dim) {
+               leftBlockStartOffset = calculateShiftOffset(leftBlockStartOffset, shiftsVector[dim], segmentSize, shape[dim]);
+               rightBlockStartOffset = calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], segmentSize, shape[dim]);
+               segmentSize *= shape[dim];
+           }
+
+           if (leftBlockSize > 0)
+               cpu_memcpy(output + leftBlockStartOffset,
+                          input + start,
+                          leftBlockSize * elementSize);
+
+
+           if (rightBlockSize > 0)
+               cpu_memcpy(output + rightBlockStartOffset,
+                          input + (start + leftBlockSize),
+                          rightBlockSize * elementSize);
+
+           start += blockSize;
+        }
+    }
+
+    const size_t DATA_INDEX = 0ul;
+    const size_t SHIFT_INDEX = 1ul;
+    const size_t AXES_INDEX = 2ul;
+    const size_t numberOfInputs = 3ul;
+
+    size_t numOfDims;
+    std::vector<size_t> shape;
+
+    std::string layerName;
+};
+
+REG_FACTORY_FOR(RollImpl, Roll);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/...nce-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp b/...nce-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/roll.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+    InferenceEngine::Precision::I8,
+    InferenceEngine::Precision::U8,
+    InferenceEngine::Precision::I16,
+    InferenceEngine::Precision::I32,
+    InferenceEngine::Precision::FP32,
+};
+
+const auto testCase1D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{16}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{5}), // Shift
+    ::testing::Values(std::vector<int64_t>{0}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase2D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{600, 450}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{300, 250}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCase3D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 320, 320}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{160, 160}), // Shift
+    ::testing::Values(std::vector<int64_t>{1, 2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeUnorderedAxes4D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{3, 11, 6, 4}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{7, 3}), // Shift
+    ::testing::Values(std::vector<int64_t>{-3, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseRepeatingAxes5D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 16, 32, 32}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{16, 15, 10, 1, 2, 8, 7, 1, 1}), // Shift
+    ::testing::Values(std::vector<int64_t>{-1, -2, -3, 0, 1, 2, 3, -2, -3}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseNegativeShifts6D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -15, -2, -1, -4, -1}), // Shift
+    ::testing::Values(std::vector<int64_t>{0, 1, 2, 3, 4, 5}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine(
+    ::testing::Values(std::vector<size_t>{2, 2, 4, 2, 3, 6, 3, 2, 3, 2}), // Input shape
+    ::testing::ValuesIn(inputPrecision), // Precision
+    ::testing::Values(std::vector<int64_t>{-2, -1, 1, 1, 1, -2}), // Shift
+    ::testing::Values(std::vector<int64_t>{-6, -4, -3, 1, -10, -2}), // Axes
+    ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/roll.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/roll.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(RollLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/...ne/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp b/...ne/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/roll.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+typedef std::tuple<
+        InferenceEngine::SizeVector, // Input shapes
+        InferenceEngine::Precision,  // Input precision
+        std::vector<int64_t>,        // Shift
+        std::vector<int64_t>,        // Axes
+        std::string> rollParams;   // Device name
+
+class RollLayerTest : public testing::WithParamInterface<rollParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<rollParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions