Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

# Header paths
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
if(OpenVINO_VERSION VERSION_LESS 2024.5)
message(FATAL_ERROR "OpenVINO 2024.5 and newer are supported. Please, use latest OpenVINO release")
if(OpenVINO_VERSION VERSION_LESS 2025.0)
message(FATAL_ERROR "OpenVINO 2025.0 and newer are supported. Please, use latest OpenVINO release")
endif()

if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4)
Expand Down Expand Up @@ -49,7 +49,7 @@
endif()
add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen)
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen onnx_proto)

target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/optimizer/double_qdq_pairs_remover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ static void ApplyNewInputValue(Graph& graph, Node& node, QDQ::InputIndex index,
input_init.ToProto(new_input_tensor);
auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name());
new_input_tensor.set_name(new_name);
new_input_tensor.add_dims(1);
NodeArg& new_input = graph_utils::AddInitializerWithExternalData(graph, new_input_tensor);
graph_utils::ReplaceNodeInput(node, index, new_input);
}
Expand Down
19 changes: 16 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -117,7 +118,9 @@ BackendManager::BackendManager(SessionContext& session_context,
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if ((!session_context_.disable_dynamic_shapes &&
(session_context_.device_type.find("CPU") != std::string::npos ||
session_context_.device_type.find("GPU") != std::string::npos)) ||
session_context_.device_type.find("GPU") != std::string::npos ||
(session_context_.device_type.find("NPU") != std::string::npos &&
session_context_.enable_causallm))) ||
(subgraph_context_.is_ep_ctx_graph)) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
Expand Down Expand Up @@ -429,8 +432,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,

const auto& onnx_model_path_name = subgraph.ModelPath();
// QDQ stripping enabled only for the NPU and experimentally on the GPU
if ((session_context_.device_type.find("NPU") != std::string::npos ||
session_context_.device_type.find("GPU") != std::string::npos) &&
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
Expand All @@ -440,6 +442,17 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else if ((session_context_.device_type.find("GPU") != std::string::npos) &&
enable_ovep_qdq_optimizer) {
// Create a copy of the model
std::unique_ptr<onnxruntime::Model> model;
Status status = qdq_scales_fix::Transform(subgraph, logger, model);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
auto model = subgraph.CreateModel(logger);
Expand Down
48 changes: 23 additions & 25 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
if (ValidateSubgraph(const_outputs_map_))
return;

// Pre-requisite is provider_option "context" must be set
auto auto_unified_compile = ((hw_target.find("AUTO") == std::string::npos) ||
(session_context_.OpenVINO_Version.at(0) >= 2024 &&
session_context_.OpenVINO_Version.at(1) > 2));
ov::AnyMap device_config;
SetOVDeviceConfiguration(device_config);
if (subgraph_context_.is_ep_ctx_graph) {
Expand Down Expand Up @@ -81,42 +77,46 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
ORT_THROW(msg);
} // Delete stream after it is no longer needed
} else {
std::shared_ptr<const onnxruntime::openvino_ep::OVNetwork> ov_model;
std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
model_proto.reset();
}
bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
!session_context_.so_disable_cpu_ep_fallback &&
!subgraph_context_.is_ep_ctx_graph;
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
eligible_for_cpu_fallback = false;
#endif
auto auto_unified_compile = (hw_target.find("AUTO") == std::string::npos);

// Unified compile is efficient with cahce_dir cached model loading that bypass Read Model
// Does not support model with exteral weights, dynamic input shape, Epctx onnx cached model,
// reshape, enable_causallm, and for NPU CPU fallback

auto is_unified_compile = (!session_context_.has_external_weights &&
!subgraph_context_.has_dynamic_input_shape &&
!session_context_.so_context_enable &&
session_context_.reshape.empty() &&
!enable_causallm &&
!eligible_for_cpu_fallback &&
auto_unified_compile);
try {
// SetOVDeviceConfiguration(device_config);
if (!session_context_.has_external_weights &&
!subgraph_context_.has_dynamic_input_shape &&
!session_context_.so_context_enable &&
session_context_.reshape.empty() &&
!enable_causallm &&
auto_unified_compile) {
// Unified OV compile_model is efficient when ov model caching is enabled
// Unified OV compile_model API is supported with AUTO from version 2024.3 and above
// Inputs with static dimensions
// Not enabled for models with external weights and when ep context is set.

if (is_unified_compile) {
exe_network_ = OVCore::Get()->CompileModel(model,
hw_target,
device_config,
subgraph_context_.subgraph_name);
} else { // For all other types use ov::ov_core read_model() to generate OV IR
// followed by ov::ov_core compile_model()
auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
exe_network_ = OVCore::Get()->CompileModel(
ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
}
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();
bool eligible_for_cpu_fallback = session_context_.device_type.find("NPU") != std::string::npos &&
!session_context_.so_disable_cpu_ep_fallback &&
!subgraph_context_.is_ep_ctx_graph;
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
eligible_for_cpu_fallback = false;
#endif

if (eligible_for_cpu_fallback) {
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
Expand All @@ -125,8 +125,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
device_config.clear();
SetOVDeviceConfiguration(device_config);
try {
// Recreate the model with CPU device type
auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
exe_network_ = OVCore::Get()->CompileModel(
ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
} catch (std::string const& msg) {
Expand Down
16 changes: 16 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,22 @@ class OVInferRequest {
auto tensor_ptr = std::make_shared<ov::Tensor>(type, shape, const_cast<void*>(ort_ptr));
SetTensor(name, tensor_ptr);
cached_binding = {tensor_ptr, ort_ptr};
} else if (ort_ptr == nullptr) {
// a null ort_ptr is expected for a tensor that has 0 elements.
// for example, a tensor of shape=[1, 8, 0, 64], which is valid.
// So, we check to see if at least one shape entry is 0.
auto contains_zero = [](const ov::Shape& shape) {
for (auto& s : shape)
if (s == 0) return true;
return false;
};
if (contains_zero(shape)) {
// if there are zero elements (i.e. at least one shape entry is 0),
// then create and set the tensor anyway.
auto tensor_ptr = std::make_shared<ov::Tensor>(type, shape);
SetTensor(name, tensor_ptr);
cached_binding = {tensor_ptr, ort_ptr};
}
}
}

Expand Down
24 changes: 24 additions & 0 deletions onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#include "ov_protobuf_utils.h"

Check warning on line 4 in onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Include the directory when naming header files [build/include_subdir] [4] Raw Output: onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp:4: Include the directory when naming header files [build/include_subdir] [4]

#include "core/graph/onnx_protobuf.h"
#include "core/common/common.h"

namespace onnxruntime {
namespace openvino_ep {
float get_float_initializer_data(const void* initializer) {
const auto* tp = reinterpret_cast<const ONNX_NAMESPACE::TensorProto*>(initializer);
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
// ORT_ENFORCE(initializer.dims_size() == 1);
return tp->float_data(0);
}
void set_float_initializer_data(const void* initializer, float data) {
auto* tp = (ONNX_NAMESPACE::TensorProto*)(initializer);
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
// ORT_ENFORCE(initializer.dims_size() == 1);
tp->set_float_data(0, data);
}
} // namespace openvino_ep
} // namespace onnxruntime
10 changes: 10 additions & 0 deletions onnxruntime/core/providers/openvino/ov_protobuf_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#pragma once
namespace onnxruntime {
namespace openvino_ep {
float get_float_initializer_data(const void* initializer);
void set_float_initializer_data(const void* initializer, float data);
} // namespace openvino_ep
} // namespace onnxruntime
10 changes: 4 additions & 6 deletions onnxruntime/core/providers/openvino/ov_versions/capability.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,14 @@ GetCapability::GetCapability(const EPCtxHandler& ep_ctx_handler,
npu_qdq_optimizer_enabled = true; // see data_ops.cc ~615 where we check for int16 types for gpu, this may change to a better approach later
}

#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 6
data_ops_ = new DataOps(graph_viewer_, V_2024_6, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0
#if OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 0
data_ops_ = new DataOps(graph_viewer_, V_2025_0, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 1
data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
#elif OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR == 2
data_ops_ = new DataOps(graph_viewer_, V_2025_2, device_type_, npu_qdq_optimizer_enabled);
#else
data_ops_ = new DataOps(graph_viewer_, V_2025_1, device_type_, npu_qdq_optimizer_enabled);
data_ops_ = new DataOps(graph_viewer_, V_2025_2, device_type_, npu_qdq_optimizer_enabled);
#endif
}

Expand Down
13 changes: 8 additions & 5 deletions onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"Sigmoid", V_2020_4, {"CPU", "GPU"}},
{"Sign", V_2020_4, {"CPU"}},
{"Sign", V_2022_1, {"GPU"}},
{"SimplifiedLayerNormalization", V_2025_2, {"CPU", "GPU"}},
{"Sin", V_2022_1, {"CPU", "GPU"}},
{"Sinh", V_2020_4, {"CPU"}},
{"Size", V_2022_1, {"CPU", "GPU"}},
Expand Down Expand Up @@ -402,7 +403,7 @@ void DataOps::populate_op_mode_supported() {

// populate unsupportedmode_t
{
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1},
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1, V_2025_2},
[this](const Node* node, const InitializedTensorSet&) {
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
for (size_t i = 0; i < node->InputDefs().size(); i++) {
Expand All @@ -418,7 +419,8 @@ void DataOps::populate_op_mode_supported() {
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1},
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1,
V_2025_2},
[this](const Node* node, const InitializedTensorSet&) {
const auto& input_args = node->InputDefs();
const auto& input_arg = (input_args.size() > 1) ? input_args[1] : input_args[0];
Expand All @@ -437,7 +439,8 @@ void DataOps::populate_op_mode_supported() {
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2,
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1},
V_2024_3, V_2024_4, V_2024_5, V_2024_6, V_2025_0, V_2025_1,
V_2025_2},
[this](const Node* node, const InitializedTensorSet&) {
// If the operator is unsqueeze
// If axes is an input, then we cannot produce a static graph.
Expand All @@ -452,8 +455,8 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Unsqueeze", obj});
}
{
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5, V_2024_6,
V_2025_0, V_2025_1},
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5,
V_2024_6, V_2025_0, V_2025_1, V_2025_2},
[this](const Node* node, const InitializedTensorSet&) {
// check for attributes
auto& upsample_attr = node->GetAttributes();
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/ov_versions/data_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ enum versionNum {
V_2024_5,
V_2024_6,
V_2025_0,
V_2025_1
V_2025_1,
V_2025_2
};

using VersionNum = enum versionNum;
Expand Down
Loading
Loading