Skip to content

Commit

Permalink
[CPU] Add Roll support
Browse files Browse the repository at this point in the history
  • Loading branch information
apertovs committed Apr 19, 2021
1 parent 26801c1 commit c0ab191
Show file tree
Hide file tree
Showing 8 changed files with 432 additions and 0 deletions.
1 change: 1 addition & 0 deletions inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,4 @@ MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
MKLDNN_EXTENSION_NODE(RollImpl, Roll);
233 changes: 233 additions & 0 deletions inference-engine/src/mkldnn_plugin/nodes/roll.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "list.hpp"
#include "base.hpp"

#include <string>
#include <vector>
#include <cmath>

#include "ie_parallel.hpp"
#include "ie_precision.hpp"
#include "mkldnn/ie_mkldnn.h"
#include "utils/general_utils.h"
#include "common/cpu_memcpy.h"


namespace InferenceEngine {
namespace Extensions {
namespace Cpu {

class RollImpl: public ExtLayerBase {
public:
explicit RollImpl(const CNNLayer* layer) {
try {
layerName = layer->name;
const std::string layerErrorPrefix = "Roll layer with name '" + layerName + "'";
if (layer->insData.size() != numberOfInputs) {
IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
}

/* Data */
auto data = layer->insData[DATA_INDEX].lock();
if (data == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable data";
}
const auto &dataTensor = data->getTensorDesc();
const auto &dataShape = dataTensor.getDims();
if (dataShape.size() < 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'data' input tensor with rank: " << dataShape.size();
}
numOfDims = dataShape.size();

const auto& dataPrecision = dataTensor.getPrecision();
if (!MKLDNNPlugin::one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::I32, Precision::FP32, Precision::I64,
Precision::U64, Precision::BF16)) {
IE_THROW() << layerErrorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
}

if (dataShape != layer->outData[0]->getTensorDesc().getDims()) {
IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
}

/* Axes */
auto axesData = layer->insData[AXES_INDEX].lock();
if (axesData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable 'axes' data";
}
const auto& axesTensor = axesData->getTensorDesc();
const auto& axesTensorPrec = axesData->getTensorDesc().getPrecision();
if (axesTensorPrec != Precision::I32 && axesTensorPrec != Precision::I64) {
IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
}

const auto axesTensorRank = axesTensor.getDims().size();
if (axesTensorRank > 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
}

/* Shift */
auto shiftData = layer->insData[SHIFT_INDEX].lock();
if (shiftData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable 'shift' data";
}
const auto& shiftTensor = shiftData->getTensorDesc();
const auto& shiftTensorPrec = shiftData->getTensorDesc().getPrecision();
if (shiftTensorPrec != Precision::I32 && shiftTensorPrec != Precision::I64) {
IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
}

const auto shiftTensorRank = shiftTensor.getDims().size();
if (shiftTensorRank > 1) {
IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
}

shape = dataShape;

LayerConfig config;
for (size_t i = 0; i < layer->insData.size(); i++) {
DataConfig inConfig;
inConfig.inPlace = -1;
inConfig.constant = false;
auto inputData = layer->insData[i].lock();
if (inputData == nullptr) {
IE_THROW() << layerErrorPrefix << " has nullable input data at " << i;
}
Precision inPrecision = i > DATA_INDEX ? Precision(Precision::I32) : inputData->getTensorDesc().getPrecision();
const SizeVector& inDims = inputData->getTensorDesc().getDims();
inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
config.inConfs.push_back(inConfig);
}
DataConfig outConfig;
outConfig.inPlace = -1;
outConfig.constant = false;
Precision outPrecision = data->getTensorDesc().getPrecision();
const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));

config.outConfs.push_back(outConfig);

config.dynBatchSupport = false;
confs.push_back(config);
} catch (InferenceEngine::Exception &ex) {
errorMsg = ex.what();
}
}

StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs, ResponseDesc *resp) noexcept {
const auto& dataPrecision = inputs[DATA_INDEX]->getTensorDesc().getPrecision();
switch (dataPrecision) {
case Precision::I8: {
rollImpl<int8_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::U8: {
rollImpl<uint8_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::I16: {
rollImpl<int16_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::I32: {
rollImpl<int32_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::FP32: {
rollImpl<float>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::I64: {
rollImpl<int64_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
case Precision::U64: {
rollImpl<uint64_t>(inputs[DATA_INDEX], inputs[SHIFT_INDEX], inputs[AXES_INDEX], outputs[0]);
break;
}
default: {
if (resp) {
std::string errorMsg = layerName + " has unsupported 'data' input precision: " + dataPrecision.name();
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
}
return GENERAL_ERROR;
}
}
return OK;
}

private:
size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) {
size_t pos = dataOffset / segmentSize % dimSize;
size_t shift = (pos + dimShift) % dimSize - pos;
return dataOffset + shift * segmentSize;
}

template<typename DataType>
void rollImpl(const Blob::CPtr &inputBlob, const Blob::CPtr &shiftsBlob, const Blob::CPtr &axesBlob, const Blob::Ptr &outputBlob) {
const auto *axes = axesBlob->cbuffer().as<const int32_t*>() + axesBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
const auto *shifts = shiftsBlob->cbuffer().as<const int32_t *>() + shiftsBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();

const auto *input =
inputBlob->cbuffer().as<const DataType *>() + inputBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();
auto *output = outputBlob->buffer().as<DataType *>() + outputBlob->getTensorDesc().getBlockingDesc().getOffsetPadding();

std::vector<size_t> shiftsVector(numOfDims, 0);
for (size_t dim = 0; dim < axesBlob->size(); ++dim) {
int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
int32_t dimSize = shape[currentAxis];
shiftsVector[currentAxis] = (shiftSum % dimSize + dimSize) % dimSize;
}

const size_t blockSize = shape.back();
const size_t totalElements = inputBlob->size();
const size_t leftBlockSize = blockSize - shiftsVector.back();
const size_t rightBlockSize = blockSize - leftBlockSize;
const size_t elementSize = sizeof(DataType);

size_t start = 0;
while (start < totalElements) {
size_t leftBlockStartOffset = start;
size_t rightBlockStartOffset = start + leftBlockSize;

size_t segmentSize = 1;
for (int dim = numOfDims - 1; dim >= 0; --dim) {
leftBlockStartOffset = calculateShiftOffset(leftBlockStartOffset, shiftsVector[dim], segmentSize, shape[dim]);
rightBlockStartOffset = calculateShiftOffset(rightBlockStartOffset, shiftsVector[dim], segmentSize, shape[dim]);
segmentSize *= shape[dim];
}

if (leftBlockSize > 0)
cpu_memcpy(output + leftBlockStartOffset,
input + start,
leftBlockSize * elementSize);


if (rightBlockSize > 0)
cpu_memcpy(output + rightBlockStartOffset,
input + (start + leftBlockSize),
rightBlockSize * elementSize);

start += blockSize;
}
}

const size_t DATA_INDEX = 0ul;
const size_t SHIFT_INDEX = 1ul;
const size_t AXES_INDEX = 2ul;
const size_t numberOfInputs = 3ul;

size_t numOfDims;
std::vector<size_t> shape;

std::string layerName;
};

REG_FACTORY_FOR(RollImpl, Roll);

} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <vector>

#include "single_layer_tests/roll.hpp"
#include "common_test_utils/test_constants.hpp"

using namespace LayerTestsDefinitions;

namespace {

const std::vector<InferenceEngine::Precision> inputPrecision = {
InferenceEngine::Precision::I8,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::FP32,
};

const auto testCase1D = ::testing::Combine(
::testing::Values(std::vector<size_t>{16}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{5}), // Shift
::testing::Values(std::vector<int64_t>{0}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCase2D = ::testing::Combine(
::testing::Values(std::vector<size_t>{600, 450}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{300, 250}), // Shift
::testing::Values(std::vector<int64_t>{0, 1}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCase3D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 320, 320}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{160, 160}), // Shift
::testing::Values(std::vector<int64_t>{1, 2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseNegativeUnorderedAxes4D = ::testing::Combine(
::testing::Values(std::vector<size_t>{3, 11, 6, 4}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{7, 3}), // Shift
::testing::Values(std::vector<int64_t>{-3, -2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseRepeatingAxes5D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 16, 32, 32}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{16, 15, 10, 1, 2, 8, 7, 1, 1}), // Shift
::testing::Values(std::vector<int64_t>{-1, -2, -3, 0, 1, 2, 3, -2, -3}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseNegativeShifts6D = ::testing::Combine(
::testing::Values(std::vector<size_t>{4, 16, 3, 6, 5, 2}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{-2, -15, -2, -1, -4, -1}), // Shift
::testing::Values(std::vector<int64_t>{0, 1, 2, 3, 4, 5}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine(
::testing::Values(std::vector<size_t>{2, 2, 4, 2, 3, 6, 3, 2, 3, 2}), // Input shape
::testing::ValuesIn(inputPrecision), // Precision
::testing::Values(std::vector<int64_t>{-2, -1, 1, 1, 1, -2}), // Shift
::testing::Values(std::vector<int64_t>{-6, -4, -3, 1, -10, -2}), // Axes
::testing::Values(CommonTestUtils::DEVICE_CPU)
);

INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName);

} // namespace
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "shared_test_classes/single_layer/roll.hpp"

namespace LayerTestsDefinitions {

TEST_P(RollLayerTest, CompareWithRefs) {
Run();
};

} // namespace LayerTestsDefinitions
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <tuple>
#include <string>

#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"

namespace LayerTestsDefinitions {

typedef std::tuple<
InferenceEngine::SizeVector, // Input shapes
InferenceEngine::Precision, // Input precision
std::vector<int64_t>, // Shift
std::vector<int64_t>, // Axes
std::string> rollParams; // Device name

class RollLayerTest : public testing::WithParamInterface<rollParams>, virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<rollParams> obj);

protected:
void SetUp() override;
};

} // namespace LayerTestsDefinitions
Loading

0 comments on commit c0ab191

Please sign in to comment.