Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into develop/new-pytho…
Browse files Browse the repository at this point in the history
…n-api
  • Loading branch information
akuporos committed Nov 5, 2021
2 parents 7456077 + 9731d9a commit a6e21e4
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class LP_TRANSFORMATIONS_API FakeQuantizeDequantization {
const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);

bool empty() const;
bool empty() const noexcept;
bool multiplyHasZeroOrDenormal() const;
bool isShared() const;
bool isLowPrecision() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ class LP_TRANSFORMATIONS_API DataPrecision {
max(max),
hasZeroPoint(hasZeroPoint) {}

bool empty() const noexcept {
return (precision == element::undefined) && (min == 0.f) && (max == 0.f) && (!hasZeroPoint);
}

static bool isSupported(const element::Type& precision) {
static const std::set<element::Type_t> lowPrecision = {
element::i8, element::u8,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class LP_TRANSFORMATIONS_API QuantizationDetails {
float getOutputLowValue(const size_t channel) const;
float getOutputHighValue(const size_t channel) const;

bool empty() const noexcept;

static bool isSupportedLevel(const size_t level);

const size_t levels;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ FakeQuantizeDequantization::FakeQuantizeDequantization(
multiplyConstant(multiplyConstant) {
}

bool FakeQuantizeDequantization::empty() const {
bool FakeQuantizeDequantization::empty() const noexcept {
return (subtract == nullptr) && (multiply == nullptr);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c
bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr<const Node>& layer) {
const auto nonConstNode = const_cast<ngraph::Node*>(layer.get())->shared_from_this();
const auto dequantization = NetworkHelper::getDequantization(nonConstNode);
if (dequantization.empty()) {
return false;
}
return dequantization.subtract != nullptr;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ void QuantizationDetails::getOutputIntervals(
}

QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
return QuantizationDetails();
}

const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();

Expand Down Expand Up @@ -153,6 +157,10 @@ std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> const
return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
}

bool QuantizationDetails::empty() const noexcept {
return (levels == 0ul) && inputLowValues.empty() && inputHighValues.empty() && outputLowValues.empty() && outputHighValues.empty();
}

bool QuantizationDetails::isSupportedLevel(const size_t level) {
static const std::unordered_set<size_t> supported_levels = { 255, 256, 65536, 65535, static_cast<size_t>(4294967296), 4294967295 };
return supported_levels.find(level) != supported_levels.end();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,9 @@ std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuan
DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) {
const auto fq = getFakeQuantizeOnWeights(node);
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
if (quantizationDetails.empty()) {
return DataPrecision();
}

const auto precisionsAttribute = getAttributeFromOutput<PrecisionsAttributePtr>(fq);
const auto precisions = precisionsAttribute == nullptr ?
Expand All @@ -380,6 +383,10 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr<

if (dequantization.empty()) {
const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n);
if (dataPrecision.empty()) {
return false;
}

if (dataPrecision.hasZeroPoint) {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "layer_transformation.hpp"

#include <sstream>
#include <memory>
#include <utility>

#include <gtest/gtest.h>

#include <transformations/utils/utils.hpp>
#include <transformations/init_node_info.hpp>
#include <low_precision/weightable_layer_transformation.hpp>
#include "lpt_ngraph_functions/convolution_function.hpp"

using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;

class IsAsymmetricOnWeightsTestValues {
public:
class Actual {
public:
ngraph::element::Type precisionBeforeDequantization;
ngraph::builder::subgraph::DequantizationOperations dequantizationOnActivations;
std::shared_ptr<ngraph::opset1::Constant> weights;
builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights;
};

TestTransformationParams params;
Actual actual;
};

typedef std::tuple<
element::Type,
ngraph::PartialShape,
IsAsymmetricOnWeightsTestValues,
std::pair<std::vector<bool>, bool> > IsAsymmetricOnWeightsParams;

class IsAsymmetricOnWeightsTransformation : public LayerTransformation, public testing::WithParamInterface<IsAsymmetricOnWeightsParams> {
public:
void SetUp() override {
const auto netPrecision = std::get<0>(GetParam());
const auto inputShape = std::get<1>(GetParam());
auto testValues = std::get<2>(GetParam());
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());

actualFunction = ngraph::builder::subgraph::ConvolutionFunction::getOriginal(
netPrecision,
testValues.actual.precisionBeforeDequantization,
inputShape,
testValues.actual.dequantizationOnActivations,
testValues.actual.weights,
testValues.actual.fakeQuantizeOnWeights,
transposeAndIsAsymmetricOnWeights.first[0],
transposeAndIsAsymmetricOnWeights.first[1],
transposeAndIsAsymmetricOnWeights.first[2],
transposeAndIsAsymmetricOnWeights.first[3],
transposeAndIsAsymmetricOnWeights.first[4]);
}

static std::string getTestCaseName(testing::TestParamInfo<IsAsymmetricOnWeightsParams> obj) {
const auto netPrecision = std::get<0>(obj.param);
auto inputShape = std::get<1>(obj.param);
IsAsymmetricOnWeightsTestValues testValues = std::get<2>(obj.param);
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(obj.param);

std::ostringstream result;
result << toString(testValues.params) << "_" <<
netPrecision << "_" <<
inputShape << "_" <<
testValues.actual.precisionBeforeDequantization << "_" <<
testValues.actual.dequantizationOnActivations << "_" << "_weights_" <<
testValues.actual.weights->get_element_type() << "_" << "{ " <<
testValues.actual.weights->cast_vector<float>()[0] << " }_" <<
testValues.actual.fakeQuantizeOnWeights << "_" <<
transposeAndIsAsymmetricOnWeights.first[0] << "_" <<
transposeAndIsAsymmetricOnWeights.first[1] << "_" <<
transposeAndIsAsymmetricOnWeights.first[2] << "_" <<
transposeAndIsAsymmetricOnWeights.first[3] << "_" <<
transposeAndIsAsymmetricOnWeights.first[4];
return result.str();
}
};

TEST_P(IsAsymmetricOnWeightsTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types();

const auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
ASSERT_TRUE(convolutions.size() == 1ul) << "convolution was not found";

const auto isAsymmetricOnWeights = ngraph::pass::low_precision::WeightableLayerTransformation::isAsymmetricOnWeights(convolutions[0]);
std::pair<std::vector<bool>, bool> transposeAndIsAsymmetricOnWeights = std::get<3>(GetParam());
ASSERT_EQ(transposeAndIsAsymmetricOnWeights.second, isAsymmetricOnWeights);
}

const std::vector<element::Type> netPrecisions = {
element::f32
};

const std::vector<ngraph::PartialShape> suitablePartialShapes = {
ngraph::PartialShape({ 1, 3, 72, 48 }),
ngraph::PartialShape({ 4, 3, 72, 48 }),
ngraph::PartialShape({ Dimension::dynamic(), 3, 72, 48 }),
ngraph::PartialShape({ 1, 3, Dimension::dynamic(), Dimension::dynamic() }),
};

const std::vector<IsAsymmetricOnWeightsTestValues> testValues = {
{
LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true),
{
ngraph::element::u8,
{{ngraph::element::f32}, { 128.f }, { 0.02f }},
op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector<float>{ 2.f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.f }, { 1.27f } },
}
}
};

const std::vector<std::pair<std::vector<bool>, bool> > transposeFlags = {
// asymmetric quantization
{{false, false, false, false, false}, true},
{{true, false, false, false, false}, true},

// not supported FakeQuantize
{{false, true, false, false, false}, false},
{{false, false, true, false, false}, false},
{{false, false, false, true, false}, false},
{{false, false, false, false, true}, false}
};

INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
IsAsymmetricOnWeightsTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(suitablePartialShapes),
::testing::ValuesIn(testValues),
::testing::ValuesIn(transposeFlags)),
IsAsymmetricOnWeightsTransformation::getTestCaseName);
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ class ConvolutionFunction {
const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights);
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const bool fqOnWeightsTransposeOnData = false,
const bool fqOnWeightsTransposeOnInputLow = false,
const bool fqOnWeightsTransposeOnInputHigh = false,
const bool fqOnWeightsTransposeOnOutputLow = false,
const bool fqOnWeightsTransposeOnOutputHigh = false);

static std::shared_ptr<ngraph::Function> getOriginalWithIncorrectWeights(
const ngraph::Shape& inputShape,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
const ngraph::PartialShape& inputShape,
const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
std::shared_ptr<ngraph::opset1::Constant> weights,
const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
const ngraph::builder::subgraph::FakeQuantizeOnWeights fqOnWeights,
const bool transposeOnData,
const bool transposeOnInputLow,
const bool transposeOnInputHigh,
const bool transposeOnOutputLow,
const bool transposeOnOutputHigh) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
auto dequantizationStructure = dequantizationBefore;
dequantizationStructure.multiply.outPrecision = netPrecision;
Expand All @@ -53,15 +58,32 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
convertOnWeights->constant_fold(convertedOutput, convertOnWeights->input_values());
const auto convertedWeights = convertedOutput[0].get_node_shared_ptr();

const auto onWeights = fakeQuantizeOnWeights.empty() ? convertedWeights :
ngraph::builder::makeFakeQuantize(
convertedWeights, netPrecision,
fakeQuantizeOnWeights.quantizationLevel,
fakeQuantizeOnWeights.constantShape,
fakeQuantizeOnWeights.inputLowValues,
fakeQuantizeOnWeights.inputHighValues,
fakeQuantizeOnWeights.outputLowValues,
fakeQuantizeOnWeights.outputHighValues);
const std::shared_ptr<ngraph::Node> constant = ngraph::opset1::Constant::create(ngraph::element::u64, ngraph::Shape{4}, {0, 1, 2, 3});
const std::shared_ptr<Node> onWeights = fqOnWeights.empty() ?
convertedWeights :
std::make_shared<opset1::FakeQuantize>(
transposeOnData ? std::make_shared<opset1::Transpose>(convertedWeights, constant) : convertedWeights,
transposeOnInputLow ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputLowValues, fqOnWeights.inputLowValues.empty()),
transposeOnInputHigh ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.inputHighValues, fqOnWeights.inputHighValues.empty()),
transposeOnOutputLow ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputLowValues, fqOnWeights.outputLowValues.empty()),
transposeOnOutputHigh ?
std::make_shared<opset1::Transpose>(
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
constant->clone_with_new_inputs({})) :
makeConstant(netPrecision, fqOnWeights.constantShape, fqOnWeights.outputHighValues, fqOnWeights.outputHighValues.empty()),
fqOnWeights.quantizationLevel);

auto convolutionOriginal = ngraph::opset1::Convolution(
ngraph::op::TemporaryReplaceOutputType(dequantization, netPrecision).get(),
Expand Down
2 changes: 2 additions & 0 deletions runtime/bindings/python/src/pyopenvino/core/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ void blob_from_numpy(const py::handle& arr, InferenceEngine::Blob::Ptr blob) {
Common::fill_blob<float>(arr, blob);
} else if (py::isinstance<py::array_t<double>>(arr)) {
Common::fill_blob<double>(arr, blob);
} else if (py::isinstance<py::array_t<bool>>(arr)) {
Common::fill_blob<bool>(arr, blob);
} else if (py::isinstance<py::array_t<int8_t>>(arr)) {
Common::fill_blob<int8_t>(arr, blob);
} else if (py::isinstance<py::array_t<int16_t>>(arr)) {
Expand Down

0 comments on commit a6e21e4

Please sign in to comment.