Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
endif()
add_dependencies(onnxruntime_providers_openvino onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_include_directories(onnxruntime_providers_openvino SYSTEM PUBLIC ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${OpenVINO_INCLUDE_DIR} ${OPENVINO_INCLUDE_DIR_LIST} ${PYTHON_INCLUDE_DIRS} $ENV{OPENCL_INCS} $ENV{OPENCL_INCS}/../../cl_headers/)
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen)
target_link_libraries(onnxruntime_providers_openvino ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${OPENVINO_LIB_LIST} ${ABSEIL_LIBS} Eigen3::Eigen onnx_proto)

target_compile_definitions(onnxruntime_providers_openvino PRIVATE FILE_NAME=\"onnxruntime_providers_openvino.dll\")

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/optimizer/double_qdq_pairs_remover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ static void ApplyNewInputValue(Graph& graph, Node& node, QDQ::InputIndex index,
input_init.ToProto(new_input_tensor);
auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name());
new_input_tensor.set_name(new_name);
new_input_tensor.add_dims(1);
NodeArg& new_input = graph_utils::AddInitializerWithExternalData(graph, new_input_tensor);
graph_utils::ReplaceNodeInput(node, index, new_input);
}
Expand Down
15 changes: 13 additions & 2 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -429,8 +430,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,

const auto& onnx_model_path_name = subgraph.ModelPath();
// QDQ stripping enabled only for the NPU and experimentally on the GPU
if ((session_context_.device_type.find("NPU") != std::string::npos ||
session_context_.device_type.find("GPU") != std::string::npos) &&
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
Expand All @@ -440,6 +440,17 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else if ((session_context_.device_type.find("GPU") != std::string::npos) &&
enable_ovep_qdq_optimizer) {
// Create a copy of the model
std::unique_ptr<onnxruntime::Model> model;
Status status = qdq_scales_fix::Transform(subgraph, logger, model);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this pass happening even for non quantized models?

Copy link

@mklimenk mklimenk Jul 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@preetha-intel, this pass is happening only when the enable_qdq_optimizer flag is set.
Inside the pass it specifically looks for quantized blocks with (u)int16 precision and ignores everything else. So the regular models are not affected by it, even if the flag was passed by accident

auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
auto model = subgraph.CreateModel(logger);
Expand Down
24 changes: 24 additions & 0 deletions onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#include "ov_protobuf_utils.h"

Check notice on line 4 in onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp#L4

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/core/providers/openvino/ov_protobuf_utils.cpp:4:  Include the directory when naming header files  [build/include_subdir] [4]

#include "core/graph/onnx_protobuf.h"
#include "core/common/common.h"

namespace onnxruntime {
namespace openvino_ep {
float get_float_initializer_data(const void* initializer) {
const auto* tp = reinterpret_cast<const ONNX_NAMESPACE::TensorProto*>(initializer);
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
// ORT_ENFORCE(initializer.dims_size() == 1);
return tp->float_data(0);
}
void set_float_initializer_data(const void* initializer, float data) {
auto* tp = (ONNX_NAMESPACE::TensorProto*)(initializer);
ORT_ENFORCE((tp->has_data_type() && (tp->data_type() == ONNX_NAMESPACE::TensorProto_DataType_FLOAT)));
// ORT_ENFORCE(initializer.dims_size() == 1);
tp->set_float_data(0, data);
}
} // namespace openvino_ep
} // namespace onnxruntime
10 changes: 10 additions & 0 deletions onnxruntime/core/providers/openvino/ov_protobuf_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#pragma once
namespace onnxruntime {
namespace openvino_ep {
float get_float_initializer_data(const void* initializer);
void set_float_initializer_data(const void* initializer, float data);
}
} // namespace onnxruntime
Loading
Loading