Openvino ep ort 5.0 (#15626)

### Description The PR adds VPU support to OpenVINO Execution Provider Bug fixes for GPU, CPU. Changes to OpenVINO Backend in Serialized Model API for faster First Inference Latency. Deprecation to HDDL-VADM and MYRIAD, removed code Support OpenVINO 2023.0 Dynamic Shapes Support for iGPU ### Motivation and Context - VPU is an upcoming hardware that can provide AI Acceleration for Client Systems through OpenVINO - If it fixes an open issue, please link to the issue here. --> --------- Signed-off-by: MaajidKhan <[email protected]> Co-authored-by: Suryaprakash Shanmugam <[email protected]> Co-authored-by: MaajidKhan <[email protected]> Co-authored-by: Preetha Veeramalai <[email protected]>
microsoft · Apr 26, 2023 · ebaafac · ebaafac
1 parent b1b6e55
commit ebaafac
Show file tree

Hide file tree

Showing 36 changed files with 704 additions and 1,301 deletions.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -1139,17 +1139,16 @@ if (onnxruntime_USE_OPENVINO)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2022.3")
     set(OPENVINO_VERSION "2022.3")
     add_definitions(-DOPENVINO_2022_3=1)
+  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
+    set(OPENVINO_VERSION "2023.0")
+    add_definitions(-DOPENVINO_2023_0=1)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
-    set(OPENVINO_VERSION "2022.3")
-    add_definitions(-DOPENVINO_2022_3=1)
+    set(OPENVINO_VERSION "2023.0")
+    add_definitions(-DOPENVINO_2023_0=1)
   else()
     message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
   endif()
 
-  if (onnxruntime_USE_OPENVINO_MYRIAD)
-    add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
-  endif()
-
   if (onnxruntime_USE_OPENVINO_GPU_FP32)
     add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
   endif()
@@ -1166,17 +1165,12 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VAD_M)
-    add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
+  if (onnxruntime_USE_OPENVINO_VPUX_FP16)
+    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VAD_F)
-    add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_MYRIAD_NP)
-    add_definitions(-DOPENVINO_CONFIG_MYRIAD=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
+  if (onnxruntime_USE_OPENVINO_VPUX_U8)
+    add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
   endif()
 
   if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
@@ -1199,13 +1193,13 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VAD_M_NP)
-    add_definitions(-DOPENVINO_CONFIG_VAD_M=1)
+  if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
+    add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VAD_F_NP)
-    add_definitions(-DOPENVINO_CONFIG_VAD_F=1)
+  if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
+    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 

diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
@@ -6,8 +6,7 @@ OpenVINO™ Execution Provider for ONNX Runtime
 OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across many  `AI models <https://github.com/onnx/models>`_ on a variety of Intel® hardware such as:
  - Intel® CPUs
  - Intel® integrated GPUs
- - Intel® Movidius™ Vision Processing Units - referred to as VPU.
-
+ - Intel® discrete GPUs
 
 Installation
 ------------
@@ -21,9 +20,6 @@ Requirements
 This package supports:
  - Intel® CPUs
  - Intel® integrated GPUs
- - Intel® Movidius™ Vision Processing Units (VPUs).
-
-Please Note for VAD-M use Docker installation / Build from Source for Linux.
 
 ``pip3 install onnxruntime-openvino==1.13.1``
 
@@ -40,7 +36,7 @@ For more details on build and installation please refer to `Build <https://onnxr
 Usage
 ^^^^^
 
-By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU or Intel® VPU for AI inferencing.
+By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU. 
 Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.
 
 For more API calls and environment variables, see  `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.

diff --git a/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h b/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
@@ -13,7 +13,7 @@ extern "C" {
 
 /**
  * \param device_type openvino device type and precision. Could be any of
- * CPU_FP32, CPU_FP16, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32.
+ * CPU_FP32, CPU_FP16, GPU_FP32, GPU_FP16
  */
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_OpenVINO,
                _In_ OrtSessionOptions* options, _In_ const char* device_type);

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -596,7 +596,7 @@ typedef struct OrtOpenVINOProviderOptions {
 #endif
   /** \brief Device type string
    *
-   * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "MYRIAD_FP16", "VAD-M_FP16" or "VAD-F_FP32"
+   * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
    */
   const char* device_type;
   unsigned char enable_vpu_fast_compile;  ///< 0 = disabled, nonzero = enabled

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -39,6 +39,8 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
     subgraph_context_.precision = InferenceEngine::Precision::FP32;
   } else if (prec_str == "FP16") {
     subgraph_context_.precision = InferenceEngine::Precision::FP16;
+  } else if (prec_str == "U8") {
+    subgraph_context_.precision = InferenceEngine::Precision::U8;
   } else {
     throw std::string("Invalid OpenVINO Precision type: " + prec_str);
   }
@@ -54,14 +56,6 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
 
   auto graph_inputs = subgraph.GetInputs();
   for (auto input : graph_inputs) {
-    if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
-      auto shape = input->Shape();
-      if (shape != nullptr) {
-        if (shape->dim_size() != 4) {
-          subgraph_context_.set_vpu_config = true;
-        }
-      }
-    }
     auto it = subgraph_context_.input_names.find(input->Name());
     if (it == subgraph_context_.input_names.end()) {
       throw std::string("Input not found in the input defs list");
@@ -79,30 +73,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
   subgraph_context_.subgraph_name = fused_node.Name();
   model_proto_ = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
 
-  if (ModelHasBatchedInputs(*model_proto_) &&
-      GetGlobalContext().is_wholly_supported_graph &&
-      GetGlobalContext().device_type.find("HDDL") != std::string::npos) {
-    subgraph_context_.enable_batching = true;
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model can be Batch inferenced \n";
-    auto model_copy = ReWriteBatchDimWithOne(*model_proto_);
-    try {
-      concrete_backend_ = BackendFactory::MakeBackend(*model_copy,
-                                                      GetGlobalContext(),
-                                                      subgraph_context_);
-    } catch (std::string const& msg) {
-      throw msg;
-    }
-    subgraph_context_.has_dynamic_input_shape = false;
-
-  } else if (ModelHasSymbolicInputDims(subgraph)) {
+  if (ModelHasSymbolicInputDims(subgraph)) {
     subgraph_context_.has_dynamic_input_shape = true;
-    if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
-      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims."
-                            " Defering backend initialization and device_type is MYRIAD.";
-    }
-    if (GetGlobalContext().device_type.find("CPU") != std::string::npos) {
-      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims and "
-                         << "device_type is CPU.";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
+    if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
+        GetGlobalContext().device_type.find("GPU") != std::string::npos) {
       if (GetGlobalContext().enable_dynamic_shapes) {
         LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                            << "Creating backend Dynamic Shapes";
@@ -117,12 +92,8 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                            << "Backend created for graph " << subgraph_context_.subgraph_name;
       }
     }
-  } else if (ModelHasSymbolicInputDims(subgraph) &&
-             GetGlobalContext().device_type.find("GPU") != std::string::npos) {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims. Defering backend initialization";
-    subgraph_context_.has_dynamic_input_shape = true;
   } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concreate input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
 
     subgraph_context_.has_dynamic_input_shape = false;
     try {
@@ -287,20 +258,14 @@ void BackendManager::Compute(OrtKernelContext* context) {
 #endif
   bool use_dynamic_backend = true;
   if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
-      GetGlobalContext().device_type.find("CPU") != std::string::npos) {
+      (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
+       GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
     concrete_backend_->Infer(context);
     use_dynamic_backend = false;
   } else if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
     auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
 
-    if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
-      for (size_t i = 0; i < subgraph_context_.input_indexes.size(); i++) {
-        if (tensor_shapes[i].size() != 4)
-          subgraph_context_.set_vpu_config = true;
-      }
-    }
-
     std::shared_ptr<IBackend> dynamic_backend;
     auto search = backend_map_.find(key);
     if (search == backend_map_.end()) {

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -58,7 +58,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   try {
     auto cnn_network = global_context.ie_core.ReadModel(model);
     if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
-        (global_context.device_type.find("MYRIAD") == std::string::npos)) {
+        (global_context.device_type.find("VPUX") == std::string::npos)) {
       // FP16 transformations
       ov::pass::ConvertFP32ToFP16 pass_obj;
       pass_obj.run_on_model(cnn_network);
@@ -96,7 +96,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       }
     }
 #ifndef NDEBUG
-#if defined OPENVINO_2022_3
+#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
     if (IsDebugEnabled()) {
       std::string name = cnn_network->get_friendly_name();
       ov::pass::Serialize serializer(name + ".xml", name + ".bin");
@@ -111,7 +111,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   }
 }
 
-InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device) {
+InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
   ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
   if (*type_string == "float" || *type_string == "tensor(float)") {
     return InferenceEngine::Precision::FP32;
@@ -128,11 +128,7 @@ InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::
   } else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
     return InferenceEngine::Precision::U8;
   } else if (*type_string == "bool" || *type_string == "tensor(bool)") {
-    if (device == "MYRIAD") {
-      return InferenceEngine::Precision::I32;
-    } else {
-      return InferenceEngine::Precision::U8;
-    }
+    return InferenceEngine::Precision::U8;
   } else if (*type_string == "int64" || *type_string == "tensor(int64)") {
     return InferenceEngine::Precision::I32;
   } else {

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -44,7 +44,7 @@ GetOutputTensor(Ort::KernelContext& context,
                 std::shared_ptr<ngraph::Node> node);
 
 InferenceEngine::Precision
-ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type, std::string device);
+ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
 
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,

diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -6,7 +6,6 @@
 #include "core/providers/openvino/contexts.h"
 #include "core/providers/openvino/ibackend.h"
 #include "basic_backend.h"
-#include "vadm_backend.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -16,11 +15,11 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                             GlobalContext& global_context,
                             const SubGraphContext& subgraph_context) {
   std::string type = global_context.device_type;
-  if (type.find("HDDL") != std::string::npos) {
-    return std::make_shared<VADMBackend>(model_proto, global_context, subgraph_context);
-  } else if (type == "CPU" || type.find("GPU") != std::string::npos || type == "MYRIAD" ||
-             type.find("HETERO") != std::string::npos || type.find("MULTI") != std::string::npos ||
-             type.find("AUTO") != std::string::npos) {
+  if (type == "CPU" || type.find("GPU") != std::string::npos ||
+      type.find("VPUX") != std::string::npos ||
+      type.find("HETERO") != std::string::npos ||
+      type.find("MULTI") != std::string::npos ||
+      type.find("AUTO") != std::string::npos) {
     std::shared_ptr<IBackend> concrete_backend_;
     try {
       concrete_backend_ = std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);