rkazants · itikhono · Oct 13, 2021 · Oct 12, 2021 · Oct 13, 2021 · Oct 13, 2021
diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake
@@ -66,6 +66,11 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test"
                  "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph"
                   onnx_out_files)
 
+set(rel_path "inference-engine/tests/functional/plugin/shared/models")
+ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
+                 ft_out_files)
+
 set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                  "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
@@ -116,6 +121,7 @@ if(ENABLE_TESTS)
     endif()
 
     add_custom_target(test_model_zoo DEPENDS ${onnx_out_files}
+                                             ${ft_out_files}
                                              ${ie_onnx_out_files}
                                              ${ie_serialize_out_files}
                                              ${ie_onnx_import_out_files})

diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md
@@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device:
 
 @snippet snippets/InferenceEngine_QueryAPI4.cpp part4
 
+The code below demonstrates how to get memory statistics of `GPU` device:
+
+@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
+
 ### GetConfig()
 
 The method is used to get information about configuration values the executable network has been created with:

diff --git a/docs/snippets/InferenceEngine_QueryAPI6.cpp b/docs/snippets/InferenceEngine_QueryAPI6.cpp
@@ -0,0 +1,12 @@
+#include <ie_core.hpp>
+
+int main() {
+using namespace InferenceEngine;
+//! [part6]
+InferenceEngine::Core core;
+auto network = core.ReadNetwork("sample.xml");
+auto exeNetwork = core.LoadNetwork(network, "GPU");
+std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
+//! [part6]
+return 0;
+}
diff --git a/docs/template_extension/op.hpp b/docs/template_extension/op.hpp
@@ -21,7 +21,9 @@ class Operation : public ngraph::op::Op {
     int64_t getAddAttr() const {
         return add;
     }
+    OPENVINO_SUPPRESS_DEPRECATED_START
     bool evaluate(const ngraph::HostTensorVector& outputs, const ngraph::HostTensorVector& inputs) const override;
+    OPENVINO_SUPPRESS_DEPRECATED_END
     bool has_evaluate() const override;
 
 private:

diff --git a/docs/template_plugin/src/template_executable_network.cpp b/docs/template_plugin/src/template_executable_network.cpp
@@ -13,10 +13,10 @@
 #include "ie_icore.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "openvino/core/except.hpp"
+#include "openvino/pass/serialize.hpp"
 #include "template/template_config.hpp"
 #include "template_itt.hpp"
 #include "template_plugin.hpp"
-#include "transformations/serialize.hpp"
 #include "transformations/utils/utils.hpp"
 
 using namespace TemplatePlugin;
@@ -205,7 +205,7 @@ void TemplatePlugin::ExecutableNetwork::Export(std::ostream& modelStream) {
     // Note: custom ngraph extensions are not supported
     std::map<std::string, ngraph::OpSet> custom_opsets;
     std::stringstream xmlFile, binFile;
-    ngraph::pass::Serialize serializer(xmlFile, binFile, custom_opsets);
+    ov::pass::Serialize serializer(xmlFile, binFile, custom_opsets);
     serializer.run_on_function(_function);
 
     auto m_constants = binFile.str();

diff --git a/inference-engine/samples/ngraph_function_creation_sample/README.md b/inference-engine/samples/ngraph_function_creation_sample/README.md
@@ -2,7 +2,7 @@
 
 This sample demonstrates how to execute an synchronous inference using [nGraph function feature](../../../docs/nGraph_DG/build_function.md) to create a network, which uses weights from LeNet classification network, which is known to work well on digit classification tasks.
 
-The sample supports only single-channel `ubyte` images as an input.
+The sample supports only single-channel [MNIST database](http://yann.lecun.com/exdb/mnist) images as an input.
 
 You do not need an XML file to create a network. The API of ngraph::Function allows to create a network on the fly from the source code.
 
@@ -21,7 +21,7 @@ Basic Inference Engine API is covered by [Hello Classification C++ sample](../he
 |:---                              |:---
 | Validated Models                 | LeNet
 | Model Format                     | Network weights file (\*.bin)
-| Validated images                 | single-channel `ubyte` images
+| Validated images                 | single-channel `MNIST ubyte` images
 | Supported devices                | [All](../../../docs/IE_DG/supported_plugins/Supported_Devices.md) |
 | Other language realization       | [Python](../../ie_bridges/python/sample/ngraph_function_creation_sample/README.md) |
 
@@ -37,7 +37,7 @@ To build the sample, please use instructions available at [Build the Sample Appl
 
 ## Running
 
-To run the sample, you need specify a model wights and ubyte image:
+To run the sample, you need specify a model wights and MNIST ubyte image:
 
 - you can use LeNet model weights in the sample folder: `lenet.bin` with FP32 weights file
 - you can use images from the media files collection available at https://storage.openvinotoolkit.org/data/test_data.

diff --git a/inference-engine/samples/ngraph_function_creation_sample/lenet.labels b/inference-engine/samples/ngraph_function_creation_sample/lenet.labels
@@ -0,0 +1,10 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
diff --git a/inference-engine/samples/ngraph_function_creation_sample/main.cpp b/inference-engine/samples/ngraph_function_creation_sample/main.cpp
@@ -338,6 +338,12 @@ int main(int argc, char* argv[]) {
                 slog::warn << "Image " + i + " cannot be read!" << slog::endl;
                 continue;
             }
+
+            if (reader->size() != inputInfoItem.second->getTensorDesc().getDims()[2] *
+                                      inputInfoItem.second->getTensorDesc().getDims()[3]) {
+                throw std::logic_error("Not supported format. Only MNist ubyte images supported.");
+            }
+
             /** Store image data **/
             std::shared_ptr<unsigned char> data(reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3],
                                                                 inputInfoItem.second->getTensorDesc().getDims()[2]));

diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
         metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
+        metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
         std::vector<std::string> configKeys;
@@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
             nr *= 2;
         IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
+    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
+        std::map<std::string, uint64_t> statistics;
+        if (m_context != nullptr) {
+            auto impl = getContextImpl(m_context);
+            impl->acquire_lock();
+            std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
+            eng->get_memory_statistics(&statistics);
+            impl->release_lock();
+        }
+        IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
     } else {
         IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
     }

diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -752,11 +752,7 @@ inline float getScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataTyp
         }
     }
 
-    auto isZero = [](float p1) {
-        return std::abs(p1) <= 0.00001f;
-    };
-
-    if (scale_factor < 0.0 || isZero(scale_factor) || std::isinf(scale_factor)) {
+    if (scale_factor <= 0.0 || std::isinf(scale_factor)) {
         THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid scale factor: " << scale_factor;
     }
 

diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -238,6 +238,57 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
     const double pow_domain = 16;
 
  protected :
+    /**
+     * @brief Adjust output scale factor to get the most precise PWL slope.
+     * NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
+     *       For all other layers, it does not improve accuracy.
+     * @param sf Scale factor to be adjusted
+     * @param layer Layer information
+     * @param quantizedParams Quantization parameters
+     * @return the adjusted scale factor
+     */
+    float adjustScaleFactor(float sf, InferenceEngine::CNNLayer const* cnnLayer,
+                            GNAPluginNS::LayerInfo const& layer,
+                            QuantizedLayerParams* quantizedParams) {
+        auto get_rank = [](uint32_t value) {
+            uint8_t rank = 0;
+            while (value >= 1) {
+                ++rank;
+                value /= 10;
+            }
+            return rank;
+        };
+        auto pow_10 = [](uint8_t degree) {
+            uint32_t value = 1;
+            for (uint8_t i = 0; i < degree; ++i) {
+                value *= 10;
+            }
+            return value;
+        };
+
+        auto slopes = getPWLSlopes(layer);
+        if (!slopes.empty()) {
+            auto div = 10;
+            auto startRange = sf > 1.0f ? static_cast<uint32_t>(sf) : sf;
+            auto endRange = startRange - startRange / div;
+            endRange = endRange > 1.0f ? static_cast<uint32_t>(endRange) : endRange;
+            uint32_t steps = 10000;
+            uint32_t rangeSize = static_cast<uint32_t>(startRange - endRange);
+            if (rangeSize >= 1) {
+                steps *= rangeSize / pow_10(get_rank(rangeSize) - 1);
+            }
+
+            auto scaleFactors = generateScaleFactors(startRange, endRange, steps);
+            auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
+            if (!fp32eq(sf, newScaleFactor) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
+                gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
+                    << " from: " << sf << " to: " << newScaleFactor << "\n";
+                sf = newScaleFactor;
+            }
+        }
+        return sf;
+    }
+
     float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
                              GNAPluginNS::LayerInfo const& layer,
                              int inputsSize,
@@ -418,24 +469,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
                 }
             }
 
-            // Adjust output scale factor to get the most precise PWL slope.
-            // NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
-            //       For all other layers, it does not improve accuracy.
-            auto slopes = getPWLSlopes(layer);
-            if (!slopes.empty() && !usePrevScaleFactor) {
-                auto div = 10;
-                auto mul = 10;
-                auto startRange = result > 1.0f ? static_cast<int32_t>(result) : result;
-                auto endRange = startRange - startRange / div;
-                endRange = endRange > 1.0f ? static_cast<int32_t>(endRange) : endRange;
-                auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast<int32_t>(startRange - endRange) * mul);
-                auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
-                if (!fp32eq(result, newScaleFactor) &&
-                    !fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
-                    gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
-                        << " from: " << result << " to: " << newScaleFactor << "\n";
-                    result = newScaleFactor;
-                }
+            if (!usePrevScaleFactor) {
+                result = adjustScaleFactor(result, cnnLayer, layer, quantizedParams);
             }
         }
 

diff --git a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp
@@ -42,9 +42,24 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
         next = input_to.begin()->second;
     }
 
-    // Check if the found layer is NCHW to NHWC permute, if it's not just skip this convolution
-    if (!LayerInfo(next).isPermute() || next->input()->getLayout() != InferenceEngine::Layout::NCHW ||
-        next->GetParamAsInts("order") != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) {
+    // Check if the found layer is NCHW to NHWC permute or has 1D data, if it's not just skip this convolution
+    if (LayerInfo(next).isPermute()) {
+        if (next->outData[0]->getLayout() != InferenceEngine::Layout::NCHW ||
+            next->GetParamAsInts("order") != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) {
+            return std::make_pair(nullptr, nullptr);
+        }
+    } else if (LayerInfo(next).isReshape()) {
+        if (next->outData.size() != 1) {
+            return std::make_pair(nullptr, nullptr);
+        }
+        // Check if reshape is expected for this pattern:
+        // the next layer has the both, height and width dimensions > 1
+        if (next->outData[0]->getDims().size() != 4 ||
+            GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H) != 1 ||
+            GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W) != 1) {
+            return std::make_pair(nullptr, nullptr);
+        }
+    } else {
         return std::make_pair(nullptr, nullptr);
     }
 
@@ -55,23 +70,26 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
            InferenceEngine::CNNNetHasPrevLayer(prev.get())) {
         prev = InferenceEngine::CNNNetPrevLayer(prev);
     }
-    // Check if the found layer is NHWC to NCHW permute or have 1D data, if it's not just skip this convolution
+    // Check if the found layer is NHWC to NCHW permute or has 1D data, if it's not just skip this convolution
     if (LayerInfo(prev).isPermute()) {
         if (prev->outData[0]->getLayout() != InferenceEngine::Layout::NCHW ||
             prev->GetParamAsInts("order") != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW)) {
             return std::make_pair(nullptr, nullptr);
         }
-    } else  {
+    } else if (LayerInfo(prev).isReshape())  {
         if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) {
             return std::make_pair(nullptr, nullptr);
         }
         // Check if reshape is expected for this pattern:
         // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1
-        if (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::C) != 1 &&
+        if (parent->insData[0].lock()->getDims().size() != 4 ||
+            GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::C) != 1 &&
             (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H) != 1 ||
              GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W) != 1)) {
             return std::make_pair(nullptr, nullptr);
         }
+    } else {
+        return std::make_pair(nullptr, nullptr);
     }
     return std::make_pair(prev, next);
 }

diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -1076,22 +1076,32 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 
             auto nextLayers = CNNNetGetAllNextLayersSkipCertain(inputLayer, -1, doesntHaveGnaMapping);
 
+            std::vector<intel_dnn_orientation_t> orientations;
             for (auto &nextLayer : nextLayers) {
                 auto dnnLayer = graphCompiler.dnnComponents.findComponent(nextLayer);
                 // non functional layer - skipped by gna
                 if (nullptr == dnnLayer) {
                     THROW_GNA_LAYER_EXCEPTION(inputLayer) << " gna mapped layer search connection failed";
                 }
-                // input orientation might be already initialized, thus verify that it matches
-                if (!inputsDesc->orientation_in.count(inputLayer->name)) {
-                    inputsDesc->orientation_in[inputLayer->name] = dnnLayer->orientation_in;
-                } else {
-                    if (inputsDesc->orientation_in[inputLayer->name] != dnnLayer->orientation_in &&
-                        dnnLayer->num_rows_in > 1 && dnnLayer->num_columns_in > 1) {
-                        THROW_GNA_EXCEPTION << "orientation for input layer: " << inputLayer->name << "cannot be calculated";
-                    }
+                // Orientation of an input doesn't make sense for components transposing the data and
+                // components with identity dimensions, so skip them
+                if (dnnLayer->operation != kDnnInterleaveOp && dnnLayer->operation != kDnnDeinterleaveOp &&
+                    dnnLayer->num_rows_in > 1 && dnnLayer->num_columns_in > 1) {
+                    orientations.push_back(dnnLayer->orientation_in);
                 }
             }
+
+            if (orientations.empty()) {
+                // in this case orientation doesn't make a sense
+                inputsDesc->orientation_in[inputLayer->name] = kDnnNonInterleavedOrientation;
+            } else if (std::adjacent_find(orientations.begin(), orientations.end(),
+                           std::not_equal_to<intel_dnn_orientation_t>()) == orientations.end()) {
+                // all orientations are equal
+                inputsDesc->orientation_in[inputLayer->name] = orientations.front();
+            } else {
+                // unsupported case: orientations are different and they are important for these components
+                THROW_GNA_EXCEPTION << "orientation for input layer: " << inputLayer->name << " cannot be calculated";
+            }
         }
     } else {
         for (auto &inputLayer : inputLayers) {

diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -265,6 +265,9 @@ class LayerInfo {
     bool isNonFunctional() const noexcept {
         return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze") || isTrivialPermute();
     }
+    bool isReshape() const noexcept {
+        return isOfType("reshape");
+    }
     bool isPermute() const noexcept {
         return isOfType("permute");
     }

diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -1265,7 +1265,7 @@ void InsertSplitAligningFilterPass::run() {
         size_t currentOffset = 0;
         int splitOutIndex = 0;
         for (auto &&splitOutput  : l->outData) {
-            auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
+            auto outputSize = product(begin(splitOutput->getDims()), end(splitOutput->getDims()));
 
             if ((currentOffset != ALIGN64(currentOffset)) || (padding != 0)) {
                 // check that this split output actually connected to further layers

diff --git a/...e-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp b/...e-engine/src/gna_plugin/transformations/insert_transpose_after_convolution_or_pooling.cpp
@@ -72,6 +72,9 @@ bool InsertTransposeAfterConvOrPool::run_on_function(std::shared_ptr<ngraph::Fun
                 transpose_ids.push_back(ix);
             }
         }
+        if (transpose_ids.size() == 1) {
+            continue;
+        }
         if (transpose_ids.size() != 2) {
             THROW_GNA_EXCEPTION << "Unable to insert transpose after: " << node->get_friendly_name()
                                 << " number of dimensions to transpose: " << transpose_ids.size();

diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
@@ -29,7 +29,7 @@
 #include <array>
 #include <cstdint>
 
-#include "transformations/serialize.hpp"
+#include "openvino/pass/serialize.hpp"
 #include "ie_ngraph_utils.hpp"
 #include "ie_plugin_config.hpp"
 #include "ie_algorithm.hpp"
@@ -714,8 +714,8 @@ void HeteroExecutableNetwork::Export(std::ostream& heteroModel) {
 
             // Note: custom ngraph extensions are not supported
             std::stringstream xmlFile, binFile;
-            ngraph::pass::Serialize serializer(xmlFile, binFile,
-                ngraph::pass::Serialize::Version::IR_V10);
+            ov::pass::Serialize serializer(xmlFile, binFile,
+                ov::pass::Serialize::Version::IR_V10);
             serializer.run_on_function(subnet.getFunction());
 
             auto m_constants = binFile.str();