microsoft · jywu-msft · Apr 16, 2025 · Dec 18, 2024 · Feb 25, 2025 · Feb 27, 2025
diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
@@ -37,12 +37,18 @@
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
   onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
+
   onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
   install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
   set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
   set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX)
   set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime")
+
+  target_compile_options(onnxruntime_providers_openvino PRIVATE
+    $<$<NOT:$<CONFIG:Release>>:-DNOT_RELEASE>
+  )
+
   if(NOT MSVC)
     target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses")
   endif()

diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
@@ -7,36 +7,36 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated NPUs (Windows only)
+ - Intel® integrated NPUs
 
 Installation
 ------------
 
 Requirements
 ^^^^^^^^^^^^
 
-- Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
-- Python 3.9 or 3.10 or 3.11 for Linux and Python 3.10, 3.11 for Windows
+- Ubuntu 18.04, 20.04 or Windows 10 - 64 bit
+- Python 3.10, 3.11, 3.12 and 3.13 for Windows and Linux
 
 This package supports:
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated NPUs (Windows only)
+ - Intel® integrated NPUs
 
 ``pip3 install onnxruntime-openvino``
 
 Please install OpenVINO™ PyPi Package separately for Windows.
 For installation instructions on Windows please refer to  `OpenVINO™ Execution Provider for ONNX Runtime for Windows <https://github.com/intel/onnxruntime/releases/>`_.
 
-**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2024.1.0 eliminating the need to install OpenVINO™ separately.
+**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2025.1.0 eliminating the need to install OpenVINO™ separately.
 
 For more details on build and installation please refer to `Build <https://onnxruntime.ai/docs/build/eps.html#openvino>`_.
 
 Usage
 ^^^^^
 
-By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU (Windows only).
+By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU.
 Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.
 
 For more API calls and environment variables, see  `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -19,6 +19,7 @@
 #include "core/providers/openvino/ibackend.h"
 #include "core/providers/openvino/backend_utils.h"
 #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
+#include "core/providers/openvino/ov_interface.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -320,9 +321,10 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
   return false;
 }
 
-static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_name,
-                                ONNX_NAMESPACE::ModelProto* model_proto,
-                                const onnxruntime::Node& fused_node) {
+static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name,
+                                [[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto,
+                                [[maybe_unused]] const onnxruntime::Node& fused_node) {
+#ifdef NOT_RELEASE
   if (openvino_ep::backend_utils::IsDebugEnabled()) {
     auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : onnx_model_path_name.filename();
 
@@ -331,11 +333,13 @@ static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_nam
     if (dash != std::string::npos) {
       auto new_name = model_name.stem().string() + subgraph_name.substr(dash, std::string::npos);
       model_name.replace_filename(new_name);
+      model_name.replace_extension(".onnx");
     }
 
     std::fstream dump(model_name, std::ios::out | std::ios::trunc | std::ios::binary);
     model_proto->SerializeToOstream(dump);
   }
+#endif
 }
 
 std::unique_ptr<ONNX_NAMESPACE::ModelProto>
@@ -358,22 +362,37 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     }
   };
 
+  [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
+  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
+#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
+  if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
+    if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled";
+      OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)});
+      // disabling OVEP qdq stripping
+      // at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false
+      // as compiler stripping is enabled
+      enable_ovep_qdq_optimizer = false;
+    } else {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled";
+    }
+  }
+#endif
+
   const auto& onnx_model_path_name = subgraph.ModelPath();
   // QDQ stripping enabled only for the NPU
   if (session_context_.device_type.find("NPU") != std::string::npos &&
-      session_context_.enable_qdq_optimizer &&
-      IsQDQGraph(subgraph)) {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
+      (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
     std::unique_ptr<onnxruntime::Model> model;
-    Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights);
+    Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     print_model_proto_duration();
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
   } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
     auto model = subgraph.CreateModel(logger);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -137,14 +137,14 @@ bool IsCILogEnabled() {
 }
 
 std::shared_ptr<const OVNetwork>
-CreateOVModel(const std::string model,
+CreateOVModel(std::string&& model,
               const SessionContext& session_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   try {
-    auto ov_model = OVCore::Get()->ReadModel(model, session_context.onnx_model_path_name.string());
+    auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
 
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                     size_t batch_slice_idx);
 
 std::shared_ptr<const OVNetwork>
-CreateOVModel(const std::string model,
+CreateOVModel(std::string&& model,
               const SessionContext& session_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -2,6 +2,8 @@
 // Licensed under the MIT License
 
 #include <map>
+#include <unordered_set>
+
 #include <string>
 #include <memory>
 #include <sstream>
@@ -69,14 +71,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                 subgraph_context_.subgraph_name);
       model_stream.reset();  // Delete stream after it is no longer needed
     } else {
-      std::shared_ptr<const OVNetwork> ov_model;
-      {
-        const std::string model = model_proto->SerializeAsString();
-        if (!subgraph_context.has_dynamic_input_shape) {
-          delete model_proto.release();
-        }
-        ov_model = CreateOVModel(model, session_context_, const_outputs_map_);
+      std::string model = model_proto->SerializeAsString();
+      if (!subgraph_context.has_dynamic_input_shape) {
+        model_proto.reset()
       }
+      auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
       LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
       exe_network_ = OVCore::Get()->CompileModel(
           ov_model, remote_context_, subgraph_context_.subgraph_name);
@@ -108,14 +107,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                  subgraph_context_.subgraph_name);
     } else {  // For all other types use ov::ov_core read_model() to generate OV IR
               // followed by ov::ov_core compile_model()
-      std::shared_ptr<const OVNetwork> ov_model;
-      {
-        const std::string model = model_proto->SerializeAsString();
-        if (!subgraph_context.has_dynamic_input_shape) {
-          delete model_proto.release();
-        }
-        ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
+      std::string model = model_proto->SerializeAsString();
+      if (!subgraph_context.has_dynamic_input_shape) {
+        model_proto.reset();
       }
+      auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
       exe_network_ = OVCore::Get()->CompileModel(
           ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
     }
@@ -164,10 +160,8 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   if (session_context_.precision.find("FP32") != std::string::npos) {
     device_config.emplace(ov::hint::inference_precision("f32"));
   }
-  if (session_context_.precision.find("ACCURACY") != std::string::npos &&
-      session_context_.device_type.find("GPU") != std::string::npos) {
+  if (session_context_.precision.find("ACCURACY") != std::string::npos) {
     if (session_context_.OpenVINO_Version.at(0) >= 2024) {
-      device_config.emplace(ov::hint::inference_precision(ov::element::undefined));
       device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
     } else {
       if (!subgraph_context_.model_precision.empty())
@@ -230,6 +224,15 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
         }
       }
     }
+    auto find_device_type_mode = [&](const std::string& device_type) -> std::string {
+      std::string device_mode = "";
+      auto delimiter_pos = device_type.find(':');
+      if (delimiter_pos != std::string::npos) {
+        std::stringstream str_stream(device_type.substr(0, delimiter_pos));
+        std::getline(str_stream, device_mode, ',');
+      }
+      return device_mode;
+    };
 
     // Parse device types like "AUTO:CPU,GPU" and extract individual devices
     auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
@@ -278,8 +281,14 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
     if (session_context_.device_type.find("AUTO") == 0 ||
         session_context_.device_type.find("HETERO") == 0 ||
         session_context_.device_type.find("MULTI") == 0) {
+      //// Parse to get the device mode (e.g., "AUTO:CPU,GPU" -> "AUTO")
+      std::unordered_set<std::string> supported_mode = {"AUTO", "HETERO", "MULTI"};
+      auto device_mode = find_device_type_mode(session_context_.device_type);
+      ORT_ENFORCE(supported_mode.find(device_mode) != supported_mode.end(), " Invalid device mode is passed : ", session_context_.device_type);
       // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
       auto individual_devices = parse_individual_devices(session_context_.device_type);
+      if (!device_mode.empty()) individual_devices.emplace_back(device_mode);
+
       // Set properties only for individual devices (e.g., "CPU", "GPU")
       for (const std::string& device : individual_devices) {
         if (target_config.count(device)) {

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -6,6 +6,7 @@
 #include <vector>
 #include <map>
 #include <unordered_map>
+#include <unordered_set>
 #include <string>
 #include <filesystem>
 #include <memory>
@@ -102,6 +103,9 @@ struct ProviderInfo {
   bool so_share_ep_contexts{false};        // ORT session option
   fs::path so_context_file_path{};         // ORT session option
   const ConfigOptions* config_options{NULL};
+  const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
+                                                               "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
+                                                               "disable_dynamic_shapes"};
 };
 
 // Holds context applicable to the entire EP instance.

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -58,43 +58,6 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const ProviderInfo& info, s
       shared_context_{shared_context},
       ep_ctx_handle_{session_context_.openvino_sdk_version, *GetLogger()} {
   InitProviderOrtApi();
-
-  // to check if target device is available
-  // using OVCore capability GetAvailableDevices to fetch list of devices plugged in
-  if (info.cache_dir.empty()) {
-    bool device_found = false;
-    std::vector<std::string> available_devices = OVCore::Get()->GetAvailableDevices();
-    // Checking for device_type configuration
-    if (info.device_type != "") {
-      if (info.device_type.find("HETERO") != std::string::npos ||
-          info.device_type.find("MULTI") != std::string::npos ||
-          info.device_type.find("AUTO") != std::string::npos) {
-        device_found = true;
-      } else {
-        for (const std::string& device : available_devices) {
-          if (device.rfind(info.device_type, 0) == 0) {
-            if (info.device_type.find("GPU") != std::string::npos && (info.precision == "FP32" ||
-                                                                      info.precision == "FP16" ||
-                                                                      info.precision == "ACCURACY")) {
-              device_found = true;
-              break;
-            }
-            if (info.device_type == "CPU" && (info.precision == "FP32")) {
-              device_found = true;
-              break;
-            }
-            if (info.device_type.find("NPU") != std::string::npos) {
-              device_found = true;
-              break;
-            }
-          }
-        }
-      }
-    }
-    if (!device_found) {
-      ORT_THROW("[ERROR] [OpenVINO] Specified device - " + info.device_type + " is not available");
-    }
-  }
 }
 
 OpenVINOExecutionProvider::~OpenVINOExecutionProvider() {