Skip to content

Commit

Permalink
[CPU] Add Roll support (openvinotoolkit#5112)
Browse files Browse the repository at this point in the history
  • Loading branch information
apertovs authored May 3, 2021
1 parent 0808975 commit 5d8f209
Show file tree
Hide file tree
Showing 11 changed files with 463 additions and 3 deletions.
1 change: 1 addition & 0 deletions inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "ReduceSum", ReduceSum},
{ "ReduceSumSquare", ReduceSumSquare},
{ "Erf", Eltwise },
{ "Roll", Roll },
};

Type TypeFromName(const std::string type) {
Expand Down
5 changes: 4 additions & 1 deletion inference-engine/src/mkldnn_plugin/mkldnn_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ enum Type {
ReduceOr,
ReduceProd,
ReduceSum,
ReduceSumSquare
ReduceSumSquare,
Roll
};

Type TypeFromName(const std::string type);
Expand Down Expand Up @@ -206,6 +207,8 @@ static std::string NameFromType(Type type) {
return "ReduceSum";
case ReduceSumSquare:
return "ReduceSumSquare";
case Roll:
return "Roll";
default:
return "Unknown";
}
Expand Down
209 changes: 209 additions & 0 deletions inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <string>
#include <vector>
#include <cmath>
#include <mkldnn_extension_utils.h>

#include "mkldnn_roll_node.h"
#include "ie_parallel.hpp"
#include "ie_precision.hpp"
#include "mkldnn/ie_mkldnn.h"
#include "utils/general_utils.h"
#include "common/cpu_memcpy.h"

using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;

MKLDNNRollNode::MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(layer, eng, cache) {
layerErrorPrefix = "Roll layer with name '" + layer->name + "'";
if (layer->insData.size() != numberOfInputs) {
IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
}

/* Data */
auto data = layer->insData[DATA_INDEX].lock();
if (data == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable data";
}

const auto &dataTensor = data->getTensorDesc();
shape = dataTensor.getDims();
const auto &dataPrecision = dataTensor.getPrecision();

if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
IE_THROW() << layerErrorPrefix << "has unsupported precision: " << dataPrecision.name();

if (shape.size() < 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << shape.size();
}
numOfDims = shape.size();

if (shape != layer->outData[0]->getTensorDesc().getDims()) {
IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
}

/* Axes */
auto axesData = layer->insData[AXES_INDEX].lock();
if (axesData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
}
const auto& axesTensor = axesData->getTensorDesc();
const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
}

const auto axesTensorRank = axesTensor.getDims().size();
if (axesTensorRank > 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
}

/* Shift */
auto shiftData = layer->insData[SHIFT_INDEX].lock();
if (shiftData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
}
const auto& shiftTensor = shiftData->getTensorDesc();
const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
}

const auto shiftTensorRank = shiftTensor.getDims().size();
if (shiftTensorRank > 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
}
}
void MKLDNNRollNode::getSupportedDescriptors() {}

void MKLDNNRollNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;

auto inputData = getCnnLayer()->insData[0].lock();

if (inputData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable 'data'";
}

InferenceEngine::Precision precision = inputData->getPrecision();

auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);

auto srcDims = getParentEdgeAt(0)->getDims();

auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
InferenceEngine::LayerConfig config;
config.dynBatchSupport = false;

auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
InferenceEngine::DataConfig dataConfig;
dataConfig.inPlace = -1;
dataConfig.constant = false;
dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
return dataConfig;
};

config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType));
config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32));
config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32));

config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType));

supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat});
}


void MKLDNNRollNode::execute(mkldnn::stream strm) {
const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision();
const auto& dataTypeSize = dataPrecision.size();
switch (dataTypeSize) {
case sizeof(PrecisionTrait<Precision::I8>::value_type): {
rollImpl<PrecisionTrait<Precision::I8>::value_type>();
break;
}
case sizeof(PrecisionTrait<Precision::I16>::value_type): {
rollImpl<PrecisionTrait<Precision::I16>::value_type>();
break;
}
case sizeof(PrecisionTrait<Precision::I32>::value_type): {
rollImpl<PrecisionTrait<Precision::I32>::value_type>();
break;
}
default:
IE_THROW() << layerErrorPrefix << "has unsupported 'data' input precision: " << dataPrecision.name();
}
}

size_t MKLDNNRollNode::calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) {
size_t pos = dataOffset / segmentSize % dimSize;
size_t shift = (pos + dimShift) % dimSize - pos;
return dataOffset + shift * segmentSize;
}

template <typename DataType>
void MKLDNNRollNode::rollImpl() {
const auto dataEdge = getParentEdgeAt(DATA_INDEX);
const auto axesEdge = getParentEdgeAt(AXES_INDEX);
const auto shiftsEdge = getParentEdgeAt(SHIFT_INDEX);

const auto *axes = reinterpret_cast<const int32_t*>(axesEdge->getMemoryPtr()->GetPtr());
const auto *shifts = reinterpret_cast<const int32_t*>(shiftsEdge->getMemoryPtr()->GetPtr());

const auto *input = reinterpret_cast<const DataType*>(dataEdge->getMemoryPtr()->GetPtr());
auto *output = reinterpret_cast<DataType*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
std::vector<size_t> shiftsVector(numOfDims, 0);

const size_t axesLength = axesEdge->getDims()[0];
for (size_t dim = 0; dim < axesLength ; ++dim) {
int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
int32_t dimSize = shape[currentAxis];
shiftsVector[currentAxis] = (shiftSum % dimSize + dimSize) % dimSize;
}

const size_t blockSize = shape.back();
const size_t totalElements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
const size_t leftBlockSize = blockSize - shiftsVector.back();
const size_t rightBlockSize = blockSize - leftBlockSize;
const size_t elementSize = sizeof(DataType);

const size_t nIterations = totalElements / blockSize;
const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides();
parallel_for(nIterations, [&](size_t iter) {
size_t start = iter * blockSize;
size_t leftBlockStartOffset = start;
size_t rightBlockStartOffset = start + leftBlockSize;

for (int dim = numOfDims - 1; dim >= 0; --dim) {
leftBlockStartOffset = calculateShiftOffset(leftBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
rightBlockStartOffset = calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], strides[dim], shape[dim]);
}

if (leftBlockSize > 0)
cpu_memcpy(output + leftBlockStartOffset,
input + start,
leftBlockSize * elementSize);


if (rightBlockSize > 0)
cpu_memcpy(output + rightBlockStartOffset,
input + (start + leftBlockSize),
rightBlockSize * elementSize);
});
}

bool MKLDNNRollNode::created() const {
return getType() == Roll;
}

void MKLDNNRollNode::createPrimitive() {}

const std::vector<size_t> MKLDNNRollNode::supportedPrecisionSizes = {1, 2, 4};

REG_MKLDNN_PRIM_FOR(MKLDNNRollNode, Roll)
41 changes: 41 additions & 0 deletions inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>

namespace MKLDNNPlugin {

class MKLDNNRollNode : public MKLDNNNode {
public:
MKLDNNRollNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNRollNode() override = default;

void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;

private:
size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize);

template <typename DataType>
void rollImpl();

std::vector<size_t> shape;
const static std::vector<size_t> supportedPrecisionSizes;
std::string layerErrorPrefix;
size_t numOfDims;

const size_t DATA_INDEX = 0ul;
const size_t SHIFT_INDEX = 1ul;
const size_t AXES_INDEX = 2ul;
const size_t numberOfInputs = 3ul;
};

} // namespace MKLDNNPlugin
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <vector>

#include "single_layer_tests/roll.hpp"
#include "common_test_utils/test_constants.hpp"

using namespace LayerTestsDefinitions;

namespace {

const std::vector<InferenceEngine::Precision> inputPrecision = {
InferenceEngine::Precision::I8,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::BF16
};

const auto testCase2DZeroShifts = ::testing::Combine(
::testing::Values(std::vector<size_t>{17, 19}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{0, 0}), // Shift
::testing::Values(std::vector<int64_t>{0, 1}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCase1D = ::testing::Combine(
::testing::Values(std::vector<size_t>{16}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{5}), // Shift
::testing::Values(std::vector<int64_t>{0}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCase2D = ::testing::Combine(
::testing::Values(std::vector<size_t>{600, 450}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{300, 250}), // Shift
::testing::Values(std::vector<int64_t>{0, 1}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCase3D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 320, 320}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{160, 160}), // Shift
::testing::Values(std::vector<int64_t>{1, 2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseNegativeUnorderedAxes4D = ::testing::Combine(
::testing::Values(std::vector<size_t>{3, 11, 6, 4}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{7, 3}), // Shift
::testing::Values(std::vector<int64_t>{-3, -2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseRepeatingAxes5D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 16, 32, 32}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{16, 15, 10, 2, 1, 7, 2, 8, 1, 1}), // Shift
::testing::Values(std::vector<int64_t>{-1, -2, -3, 1, 0, 3, 3, 2, -2, -3}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseNegativeShifts6D = ::testing::Combine(
::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5, 2}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{-2, -15, -2, -1, -4, -1}), // Shift
::testing::Values(std::vector<int64_t>{0, 1, 2, 3, 4, 5}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 2, 4, 2, 3, 6, 3, 2, 3, 2}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{-2, -1, 1, 1, 1, -2}), // Shift
::testing::Values(std::vector<int64_t>{-6, -4, -3, 1, -10, -2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d_zero_shifts, RollLayerTest, testCase2DZeroShifts, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName);

} // namespace
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "shared_test_classes/single_layer/roll.hpp"

namespace LayerTestsDefinitions {

TEST_P(RollLayerTest, CompareWithRefs) {
Run();
};

} // namespace LayerTestsDefinitions
Loading

0 comments on commit 5d8f209

Please sign in to comment.