Merge 2020.4.0.1 (#1764)

* [GNA] Update GNA lib + propagate QoS timeout to the calling app (#1188) * [GNA] Remove empty PWL (#1459) * [GNA] Support timeout value set in Wait (#1499) * [GNA] Bump GNA2 version to 1010 (#1510) * [GNA] stored request id for completed sync infer request in order to get status later using wait() (#1458) * stored request id for completed async infer request in order to get it's status later * preserved status not started for multiple sequential calls to wait() Co-authored-by: Denis Orlov <[email protected]> * [GNA] Fix callbacks (#1607) * [GNA] Bump GNA2 version to 1047 (#1614) * merge documentation updates from 2020/4 branch (#1671) * update system requirements (#1321) * update release version in readme * Doc Migration from Gitlab (#1289) * Update FakeQuantize_1.md * Update performance_benchmarks.md * Updates graphs for FPGA * Update performance_benchmarks.md * Change DL Workbench structure (#1) * Changed DL Workbench structure * Update performance_benchmarks_faq.md * Fixes in DL Workbench layout * Fixes for CVS-31290 * [DL Workbench] Minor correction * Fix for CVS-30955 * Added nGraph deprecation notice as requested by Zoe * fix broken links in api doxy layouts * Fixed POT TOC * Update PAC_Configure.md PAC DCP 1.2.1 install guide. * Update inference_engine_intro.md * Update opset.md * Update VisionAcceleratorFPGA_Configure.md (#1378) Updated from 2020.3 to 2020.4 Co-authored-by: domi2000 <[email protected]> * Updated documentation for 2020.4 (#1434) * Updated documentation for 2020.4 * Updated Core::ReadNetwork documentation (#1178) Co-authored-by: Ilya Churaev <[email protected]> Co-authored-by: Nikolay Tyukaev <[email protected]> Co-authored-by: domi2000 <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: Ilya Churaev <[email protected]> * Documentation updates for 2020.4 (#1672) (#1729) * Doc updates * 2020.4 doc updates * Removed </br> tag * Minor fix * Minor fixes * Updated documentation for 2020.4 (#1434) * Updated documentation for 2020.4 * Updated Core::ReadNetwork documentation (#1178) * Fixed docs Co-authored-by: Ilya Churaev <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: Ilya Churaev <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: Ilya Churaev <[email protected]> Co-authored-by: Pavel Rodionov <[email protected]> Co-authored-by: Eugene Smirnov <[email protected]> Co-authored-by: Alexey Suhov <[email protected]> Co-authored-by: Ilya Churaev <[email protected]> Co-authored-by: Nikolay Tyukaev <[email protected]> Co-authored-by: domi2000 <[email protected]> Co-authored-by: Ilya Lavrenov <[email protected]> Co-authored-by: Andrey Zaytsev <[email protected]>
openvinotoolkit · Aug 14, 2020 · c880ecb · c880ecb
1 parent 4fa0e8a
commit c880ecb
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 29 deletions.
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
@@ -286,7 +286,7 @@ if (ENABLE_GNA)
             set(GNA_VERSION "01.00.00.1401")
         endif()
         if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "02.00.00.0925")
+            set(GNA_VERSION "02.00.00.1047")
         endif()
         RESOLVE_DEPENDENCY(GNA
                 ARCHIVE_UNIFIED "GNA/GNA_${GNA_VERSION}.zip"

diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -1402,9 +1402,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                                 comp.num_columns_out,
                                 comp.op.affine.num_bytes_per_bias,
                                 comp.op.affine.ptr_biases),
-                        createGna2TensorPwl(
-                                0,
-                                nullptr),  //  Temporal PWL as not null required by Gna2OperationInitRecurrent
+                        nullptr,
                         create_uint32_parameter(1));    // TODO: GNA2: Handle other delays
                 AdvanceOperationIfAllApplied(component, i, gnaOperation);
 #else
@@ -1464,9 +1462,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                                 comp.op.conv1D.num_filters,
                                 comp.op.conv1D.num_bytes_per_bias,
                                 comp.op.conv1D.ptr_biases),
-                        createGna2TensorPwl(
-                                0,
-                                nullptr),  // Temporal PWL as not null required by Gna2OperationInitConvolution
+                        nullptr,
                         create_shape1D_parameter(
                                 comp.op.conv1D.num_feature_maps * comp.op.conv1D.num_feature_map_columns),
                         nullptr);
@@ -1520,7 +1516,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
                     THROW_GNA_EXCEPTION << "Pooling component with no preceeding component";
 #if  GNA_LIB_VER == 2
                 } else if (gnaOperation->Type == Gna2OperationTypeConvolution) {
-                    if (gnaOperation->Operands[PwlOpIdx]->Shape.Dimensions[0] != 0) {
+                    auto pwlOperand = gnaOperation->Operands[PwlOpIdx];
+                    if (pwlOperand != nullptr && pwlOperand->Shape.Dimensions[0] != 0) {
                         THROW_GNA_EXCEPTION << "Encountered activation component before pooling component at." << i;
                     } else {
                         const auto poolMode = reinterpret_cast<Gna2PoolingMode*>(gnaUserAllocator(sizeof(Gna2PoolingMode)));

diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -266,19 +266,23 @@ const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string>
 };
 #endif
 
-void GNADeviceHelper::wait(uint32_t reqId) {
+bool GNADeviceHelper::wait(uint32_t reqId, int64_t millisTimeout) {
 #if GNA_LIB_VER == 2
-    const auto status = Gna2RequestWait(reqId, GNA_TIMEOUT);
+    const auto status = Gna2RequestWait(reqId, millisTimeout);
+    if (status == Gna2StatusDriverQoSTimeoutExceeded) {
+        return false;
+    }
     checkGna2Status(status);
 #else
     if (isPerformanceMeasuring) {
-        nGNAStatus = GNAWaitPerfRes(nGNAHandle, GNA_TIMEOUT, reqId, &nGNAPerfResults);
+        nGNAStatus = GNAWaitPerfRes(nGNAHandle, millisTimeout, reqId, &nGNAPerfResults);
     } else {
-        nGNAStatus = GNAWait(nGNAHandle, GNA_TIMEOUT, reqId);
+        nGNAStatus = GNAWait(nGNAHandle, millisTimeout, reqId);
     }
     checkStatus();
 #endif
     updateGnaPerfCounters();
+    return true;
 }
 
 #if GNA_LIB_VER == 1

diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -52,7 +52,6 @@ class GNADeviceHelper {
 
 #define MAX_TIMEOUT 500000
 #endif
-    const uint32_t GNA_TIMEOUT = MAX_TIMEOUT;
     bool isPerformanceMeasuring = false;
     bool deviceOpened = false;
 public:
@@ -115,7 +114,7 @@ class GNADeviceHelper {
     static void checkGna2Status(Gna2Status status);
     static void checkGna2Status(Gna2Status status, const Gna2Model& gnaModel);
 #endif
-    void wait(uint32_t id);
+    bool wait(uint32_t id, int64_t millisTimeout = MAX_TIMEOUT);
 
     struct DumpResult {
 #if GNA_LIB_VER == 2

diff --git a/inference-engine/src/gna_plugin/gna_infer_request.hpp b/inference-engine/src/gna_plugin/gna_infer_request.hpp
@@ -48,7 +48,16 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
     void InferImpl() override {
         // execute input pre-processing.
         execDataPreprocessing(_inputs);
-        plg->Infer(_inputs, _outputs);
+        // result returned from sync infer wait method
+        auto result = plg->Infer(_inputs, _outputs);
+
+        // if result is false we are dealing with QoS feature
+        // if result is ok, next call to wait() will return Ok, if request not in gna_queue
+        if (!result) {
+            inferRequestIdx = -1;
+        } else {
+            inferRequestIdx = -2;
+        }
     }
 
     /**
@@ -73,20 +82,33 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
         if (_callback) {
             auto infer_request = _publicInterface.lock();
             IE_ASSERT(infer_request != nullptr);
-            auto res = Wait(0);
+            auto res = Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
             _callback(infer_request, res);
         }
     }
 
+
     InferenceEngine::StatusCode Wait(int64_t millis_timeout) override {
         if (inferRequestIdx == -1) {
             return InferenceEngine::INFER_NOT_STARTED;
         } else if (millis_timeout < -1) {
             THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str;
         }
 
-        plg->Wait(inferRequestIdx);
-        return InferenceEngine::OK;
+        bool qosOK;
+        if (millis_timeout == InferenceEngine::IInferRequest::WaitMode::RESULT_READY) {
+            qosOK = plg->Wait(inferRequestIdx);
+        } else {
+            qosOK = plg->WaitFor(inferRequestIdx, millis_timeout);
+        }
+
+        if (qosOK) {
+            return InferenceEngine::OK;
+        } else {
+            // need to preserve invalid state here to avoid next Wait() from clearing it
+            inferRequestIdx = -1;
+            return InferenceEngine::INFER_NOT_STARTED;
+        }
     }
 };
 }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -956,16 +956,23 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
     return idx;
 }
 
-void GNAPlugin::Wait(uint32_t request_idx) {
+bool GNAPlugin::Wait(uint32_t request_idx) {
+    return WaitFor(request_idx, MAX_TIMEOUT);
+}
+
+bool GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
 #if GNA_LIB_VER == 2
     auto& nnets = gnaRequestConfigToRequestIdMap;
 #endif
-    if (nnets.size() <= request_idx) return;    // TODO: GNA2: check whether necessary
+    if (nnets.size() <= request_idx) return true;    // TODO: GNA2: check whether necessary
     // already synced TODO: might be copy required ???
-    if (std::get<1>(nnets[request_idx]) == -1) return;
+    if (std::get<1>(nnets[request_idx]) == -1) return true;
 
     if (gnadevice) {
-        gnadevice->wait(std::get<1>(nnets[request_idx]));
+        if (!gnadevice->wait(std::get<1>(nnets[request_idx]), millisTimeout)) {
+            std::get<1>(nnets[request_idx]) = -1;
+            return false;
+        }
     }
 
     std::get<1>(nnets[request_idx]) = -1;
@@ -1055,13 +1062,14 @@ void GNAPlugin::Wait(uint32_t request_idx) {
         }
         output_idx++;
     }
+    return true;
 }
 
 void GNAPlugin::Reset() {
     graphCompiler.Reset();
 }
 
-void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
+bool GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
     BlobMap bmInput;
     BlobMap bmOutput;
     if (inputsDataMap.size() != 1) {
@@ -1072,11 +1080,11 @@ void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob
     bmInput[inputsDataMap.begin()->first] = std::shared_ptr<Blob>(const_cast<Blob*>(&input), [](Blob*){});
     IE_ASSERT(!outputsDataMap.empty());
     bmOutput[outputsDataMap.begin()->first] = std::shared_ptr<Blob>(&output, [](Blob*){});
-    Infer(bmInput, bmOutput);
+    return Infer(bmInput, bmOutput);
 }
 
-void GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
-    Wait(QueueInference(input, result));
+bool GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
+    return  Wait(QueueInference(input, result));
 }
 
 Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) {

diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -96,7 +96,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
 
     void LoadNetwork(InferenceEngine::ICNNNetwork &network);
 
-    void Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result);
+    bool Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result);
     void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap);
     void AddExtension(InferenceEngine::IExtensionPtr extension) override;
 
@@ -107,15 +107,16 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     InferenceEngine::ExecutableNetwork LoadNetwork(const InferenceEngine::ICNNNetwork &network,
                                   const std::map<std::string, std::string> &config_map,
                                   InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
-    void Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
+    bool Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
     void SetCore(InferenceEngine::ICore*) noexcept override {}
     InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
     void Reset();
     void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
                       const std::map<std::string, std::string>& config,
                       InferenceEngine::QueryNetworkResult &res) const override;
     uint32_t QueueInference(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result);
-    void Wait(uint32_t idx = 0);
+    bool Wait(uint32_t idx);
+    bool WaitFor(uint32_t idx, int64_t millisTimeout);
 
     InferenceEngine::Parameter GetConfig(const std::string& name,
                                          const std::map<std::string, InferenceEngine::Parameter> & options) const override;