Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
e85411a
Updated Internal CI (#581)
jatinwadhwa921 Dec 18, 2024
0d42af9
Updated Internal CI OV version (#594)
jatinwadhwa921 Feb 25, 2025
3dc24ef
Updated ov version in pipeline (#595)
jatinwadhwa921 Feb 27, 2025
9c2fee5
[OVEP] Fix for deprecated OV element type (#597)
jatinwadhwa921 Feb 28, 2025
17f4bc7
Merge branch 'master' into sync_msft_29_2_25
n1harika Feb 28, 2025
4bb577a
Merge pull request #600 from intel/sync_msft_29_2_25
ankitm3k Feb 28, 2025
60ee27a
Sahar/session option develop (#601)
sfatimar Feb 28, 2025
ec62bf3
Use absolute paths for libraries loaded with LOAD_WITH_ALTERED_SEARCH…
jnagi-intel Mar 4, 2025
bd32f51
Remove unintended model copies during compilation (#584)
ericcraw Mar 4, 2025
a6cdf62
Rebasing with msft commits (#607)
jatinwadhwa921 Mar 10, 2025
cdc209c
Revert "Rebasing with msft commits (#607)"
ankitm3k Mar 10, 2025
920ed58
Merge pull request #607
ankitm3k Mar 10, 2025
788fc78
Revert "Merge pull request #607"
jatinwadhwa921 Mar 10, 2025
a046532
Merge pull request #609 from intel/jatin_revert_msft_changes
ankitm3k Mar 10, 2025
ec98cce
Merge branch 'master' into syncing_msft_commits_3_10_25
jatinwadhwa921 Mar 10, 2025
73e0fea
Merge pull request #611 from intel/syncing_msft_commits_3_10_25
jatinwadhwa921 Mar 10, 2025
61b36ef
[OVEP] Fix for precision accuracy
jatinwadhwa921 Mar 5, 2025
ea13a05
Merge pull request #603 from intel/jatin_fix_precison_acc_issue
sfatimar Mar 11, 2025
7683e37
Refactor OVRTAllocator to return base pointer of remote tensor (#613)
ericcraw Mar 17, 2025
1788576
Commit Lint Errors fix (#606)
sfatimar Mar 18, 2025
23e17e2
fix quantizedLinear layer feeds into grapg output (#615)
saurabhkale17 Mar 18, 2025
91e64fe
Merge branch 'master' into sync_msft_18_3_25
jatinwadhwa921 Mar 18, 2025
6083601
Merge pull request #621 from intel/sync_msft_18_3_25
jatinwadhwa921 Mar 18, 2025
8b4a6d2
[OVEP] Fix for dumping the model in correct format (#616)
jatinwadhwa921 Mar 19, 2025
7269615
[OVEP] Added Cast and Resize to operators that handle zero-valued dim…
jatinwadhwa921 Mar 19, 2025
9ee95d1
Merge branch 'master' into sync_msft_20_3_25
jatinwadhwa921 Mar 20, 2025
2a24806
Merge pull request #624 from intel/sync_msft_20_3_25
jatinwadhwa921 Mar 21, 2025
c7ac5c8
Merge branch 'master' into sync_msft_25_3_25
jatinwadhwa921 Mar 25, 2025
2c61a3a
Merge pull request #627 from intel/sync_msft_25_3_25
jatinwadhwa921 Mar 26, 2025
e240695
[OVEP] Fix for Dynamic backend creation for NPU. (#622)
gupta-pallavi Mar 27, 2025
322a7e1
Remove unnecessary device queries (#620)
nproshun Mar 28, 2025
25912f7
Add support for parsing AUTO, HETERO and MULTI from json config (#605)
preetha-intel Apr 2, 2025
fbf43a9
Revert "[OVEP] Fix for Dynamic backend creation for NPU. (#622)" (#635)
gupta-pallavi Apr 3, 2025
2313d11
[OVEP] Fix for building OVEP without vcpkg flag (#637)
jatinwadhwa921 Apr 3, 2025
a9a3d10
Merge branch 'master' into sync_msft_4_4_25
jatinwadhwa921 Apr 4, 2025
0636815
Merge pull request #638 from intel/sync_msft_4_4_25
jatinwadhwa921 Apr 4, 2025
1e85f1d
[OVEP] Updated Documentation for python wheels (#640)
jatinwadhwa921 Apr 4, 2025
80dfee9
Device type refactoring (#630)
preetha-intel Apr 4, 2025
2e4d541
Enable adaptive stripping and eliminate dependency of weight sharing …
saurabhkale17 Apr 7, 2025
c0c347c
Add Config for Release build
saurabhkale17 Apr 8, 2025
8517c64
Merge branch 'master' into syncing_msft_8_4_25
saurabhkale17 Apr 8, 2025
b999a1b
Merge pull request #643 from intel/syncing_msft_8_4_25
jatinwadhwa921 Apr 8, 2025
a8527b9
Bug fix in provider key verification (#644)
preetha-intel Apr 8, 2025
4e63ef6
Fix the LUID check (#647)
preetha-intel Apr 10, 2025
f0216fd
Merge branch 'master' into sync_msft_10_4_25
jatinwadhwa921 Apr 10, 2025
a5e6e05
Merge pull request #649 from intel/sync_msft_10_4_25
jatinwadhwa921 Apr 10, 2025
6fc0ed0
Update OV version for Intel Internal CI
jatinwadhwa921 Apr 11, 2025
9294c4c
Merge pull request #652 from intel/jatin_update_int_ci_2025_1
ankitm3k Apr 11, 2025
c2558f3
[OVEP] Update ov version in ort (#653)
jatinwadhwa921 Apr 11, 2025
2f6f45f
Revert OVEP internal ci file
preetha-intel Apr 11, 2025
23b7380
Merge branch 'master' into ovep-release-1.22
preetha-intel Apr 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,18 @@

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")

onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime")

target_compile_options(onnxruntime_providers_openvino PRIVATE
$<$<NOT:$<CONFIG:Release>>:-DNOT_RELEASE>
)

if(NOT MSVC)
target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses")
endif()
Expand Down
12 changes: 6 additions & 6 deletions docs/python/ReadMeOV.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,36 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated NPUs (Windows only)
- Intel® integrated NPUs

Installation
------------

Requirements
^^^^^^^^^^^^

- Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
- Python 3.9 or 3.10 or 3.11 for Linux and Python 3.10, 3.11 for Windows
- Ubuntu 18.04, 20.04 or Windows 10 - 64 bit
- Python 3.10, 3.11, 3.12 and 3.13 for Windows and Linux

This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated NPUs (Windows only)
- Intel® integrated NPUs

``pip3 install onnxruntime-openvino``

Please install OpenVINO™ PyPi Package separately for Windows.
For installation instructions on Windows please refer to `OpenVINO™ Execution Provider for ONNX Runtime for Windows <https://github.com/intel/onnxruntime/releases/>`_.

**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2024.1.0 eliminating the need to install OpenVINO™ separately.
**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2025.1.0 eliminating the need to install OpenVINO™ separately.

For more details on build and installation please refer to `Build <https://onnxruntime.ai/docs/build/eps.html#openvino>`_.

Usage
^^^^^

By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU (Windows only).
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU.
Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.

For more API calls and environment variables, see `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.
Expand Down
35 changes: 27 additions & 8 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "core/providers/openvino/ibackend.h"
#include "core/providers/openvino/backend_utils.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/ov_interface.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -320,9 +321,10 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
return false;
}

static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_name,
ONNX_NAMESPACE::ModelProto* model_proto,
const onnxruntime::Node& fused_node) {
static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name,
[[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto,
[[maybe_unused]] const onnxruntime::Node& fused_node) {
#ifdef NOT_RELEASE
if (openvino_ep::backend_utils::IsDebugEnabled()) {
auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : onnx_model_path_name.filename();

Expand All @@ -331,11 +333,13 @@ static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_nam
if (dash != std::string::npos) {
auto new_name = model_name.stem().string() + subgraph_name.substr(dash, std::string::npos);
model_name.replace_filename(new_name);
model_name.replace_extension(".onnx");
}

std::fstream dump(model_name, std::ios::out | std::ios::trunc | std::ios::binary);
model_proto->SerializeToOstream(dump);
}
#endif
}

std::unique_ptr<ONNX_NAMESPACE::ModelProto>
Expand All @@ -358,22 +362,37 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
}
};

[[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
[[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled";
OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)});
// disabling OVEP qdq stripping
// at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false
// as compiler stripping is enabled
enable_ovep_qdq_optimizer = false;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled";
}
}
#endif

const auto& onnx_model_path_name = subgraph.ModelPath();
// QDQ stripping enabled only for the NPU
if (session_context_.device_type.find("NPU") != std::string::npos &&
session_context_.enable_qdq_optimizer &&
IsQDQGraph(subgraph)) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights);
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0";
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
auto model = subgraph.CreateModel(logger);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@ bool IsCILogEnabled() {
}

std::shared_ptr<const OVNetwork>
CreateOVModel(const std::string model,
CreateOVModel(std::string&& model,
const SessionContext& session_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (IsCILogEnabled()) {
std::cout << "CreateNgraphFunc" << std::endl;
}
try {
auto ov_model = OVCore::Get()->ReadModel(model, session_context.onnx_model_path_name.string());
auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());

// Check for Constant Folding
if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
size_t batch_slice_idx);

std::shared_ptr<const OVNetwork>
CreateOVModel(const std::string model,
CreateOVModel(std::string&& model,
const SessionContext& session_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

Expand Down
43 changes: 26 additions & 17 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Licensed under the MIT License

#include <map>
#include <unordered_set>

#include <string>
#include <memory>
#include <sstream>
Expand Down Expand Up @@ -69,14 +71,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
subgraph_context_.subgraph_name);
model_stream.reset(); // Delete stream after it is no longer needed
} else {
std::shared_ptr<const OVNetwork> ov_model;
{
const std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
delete model_proto.release();
}
ov_model = CreateOVModel(model, session_context_, const_outputs_map_);
std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
model_proto.reset()
}
auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
exe_network_ = OVCore::Get()->CompileModel(
ov_model, remote_context_, subgraph_context_.subgraph_name);
Expand Down Expand Up @@ -108,14 +107,11 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
subgraph_context_.subgraph_name);
} else { // For all other types use ov::ov_core read_model() to generate OV IR
// followed by ov::ov_core compile_model()
std::shared_ptr<const OVNetwork> ov_model;
{
const std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
delete model_proto.release();
}
ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
model_proto.reset();
}
auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
exe_network_ = OVCore::Get()->CompileModel(
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
}
Expand Down Expand Up @@ -164,10 +160,8 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
if (session_context_.precision.find("FP32") != std::string::npos) {
device_config.emplace(ov::hint::inference_precision("f32"));
}
if (session_context_.precision.find("ACCURACY") != std::string::npos &&
session_context_.device_type.find("GPU") != std::string::npos) {
if (session_context_.precision.find("ACCURACY") != std::string::npos) {
if (session_context_.OpenVINO_Version.at(0) >= 2024) {
device_config.emplace(ov::hint::inference_precision(ov::element::undefined));
device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
} else {
if (!subgraph_context_.model_precision.empty())
Expand Down Expand Up @@ -230,6 +224,15 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
}
}
}
auto find_device_type_mode = [&](const std::string& device_type) -> std::string {
std::string device_mode = "";
auto delimiter_pos = device_type.find(':');
if (delimiter_pos != std::string::npos) {
std::stringstream str_stream(device_type.substr(0, delimiter_pos));
std::getline(str_stream, device_mode, ',');
}
return device_mode;
};

// Parse device types like "AUTO:CPU,GPU" and extract individual devices
auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
Expand Down Expand Up @@ -278,8 +281,14 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
if (session_context_.device_type.find("AUTO") == 0 ||
session_context_.device_type.find("HETERO") == 0 ||
session_context_.device_type.find("MULTI") == 0) {
//// Parse to get the device mode (e.g., "AUTO:CPU,GPU" -> "AUTO")
std::unordered_set<std::string> supported_mode = {"AUTO", "HETERO", "MULTI"};
auto device_mode = find_device_type_mode(session_context_.device_type);
ORT_ENFORCE(supported_mode.find(device_mode) != supported_mode.end(), " Invalid device mode is passed : ", session_context_.device_type);
// Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
auto individual_devices = parse_individual_devices(session_context_.device_type);
if (!device_mode.empty()) individual_devices.emplace_back(device_mode);

// Set properties only for individual devices (e.g., "CPU", "GPU")
for (const std::string& device : individual_devices) {
if (target_config.count(device)) {
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <filesystem>
#include <memory>
Expand Down Expand Up @@ -102,6 +103,9 @@ struct ProviderInfo {
bool so_share_ep_contexts{false}; // ORT session option
fs::path so_context_file_path{}; // ORT session option
const ConfigOptions* config_options{NULL};
const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
"load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
"disable_dynamic_shapes"};
};

// Holds context applicable to the entire EP instance.
Expand Down
37 changes: 0 additions & 37 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,43 +58,6 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const ProviderInfo& info, s
shared_context_{shared_context},
ep_ctx_handle_{session_context_.openvino_sdk_version, *GetLogger()} {
InitProviderOrtApi();

// to check if target device is available
// using OVCore capability GetAvailableDevices to fetch list of devices plugged in
if (info.cache_dir.empty()) {
bool device_found = false;
std::vector<std::string> available_devices = OVCore::Get()->GetAvailableDevices();
// Checking for device_type configuration
if (info.device_type != "") {
if (info.device_type.find("HETERO") != std::string::npos ||
info.device_type.find("MULTI") != std::string::npos ||
info.device_type.find("AUTO") != std::string::npos) {
device_found = true;
} else {
for (const std::string& device : available_devices) {
if (device.rfind(info.device_type, 0) == 0) {
if (info.device_type.find("GPU") != std::string::npos && (info.precision == "FP32" ||
info.precision == "FP16" ||
info.precision == "ACCURACY")) {
device_found = true;
break;
}
if (info.device_type == "CPU" && (info.precision == "FP32")) {
device_found = true;
break;
}
if (info.device_type.find("NPU") != std::string::npos) {
device_found = true;
break;
}
}
}
}
}
if (!device_found) {
ORT_THROW("[ERROR] [OpenVINO] Specified device - " + info.device_type + " is not available");
}
}
}

OpenVINOExecutionProvider::~OpenVINOExecutionProvider() {
Expand Down
Loading
Loading