diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake index 143d002c6173e..f149030c15702 100644 --- a/cmake/onnxruntime_providers_openvino.cmake +++ b/cmake/onnxruntime_providers_openvino.cmake @@ -37,12 +37,18 @@ source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc") + onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json) install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/) set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20) set_target_properties(onnxruntime_providers_openvino PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(onnxruntime_providers_openvino PROPERTIES FOLDER "ONNXRuntime") + + target_compile_options(onnxruntime_providers_openvino PRIVATE + $<$>:-DNOT_RELEASE> + ) + if(NOT MSVC) target_compile_options(onnxruntime_providers_openvino PRIVATE "-Wno-parentheses") endif() diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst index 86914699bbf6d..fefef421158f8 100644 --- a/docs/python/ReadMeOV.rst +++ b/docs/python/ReadMeOV.rst @@ -7,7 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man - Intel® CPUs - Intel® integrated GPUs - Intel® discrete GPUs - - Intel® integrated NPUs (Windows only) + - Intel® integrated NPUs Installation ------------ @@ -15,28 +15,28 @@ Installation Requirements ^^^^^^^^^^^^ -- Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit -- Python 3.9 or 3.10 or 3.11 for Linux and Python 3.10, 3.11 for Windows +- Ubuntu 18.04, 20.04 or Windows 10 - 64 bit +- Python 3.10, 3.11, 3.12 and 3.13 for Windows and Linux This package supports: - Intel® CPUs - Intel® integrated GPUs - Intel® discrete GPUs - - Intel® integrated NPUs (Windows only) + - Intel® integrated NPUs ``pip3 install onnxruntime-openvino`` Please install OpenVINO™ PyPi Package separately for Windows. For installation instructions on Windows please refer to `OpenVINO™ Execution Provider for ONNX Runtime for Windows `_. -**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2024.1.0 eliminating the need to install OpenVINO™ separately. +**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2025.1.0 eliminating the need to install OpenVINO™ separately. For more details on build and installation please refer to `Build `_. Usage ^^^^^ -By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU (Windows only). +By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU. Invoke `the provider config device type argument `_ to change the hardware on which inferencing is done. For more API calls and environment variables, see `Usage `_. diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 2a842b8a1eca8..d758430f39108 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -19,6 +19,7 @@ #include "core/providers/openvino/ibackend.h" #include "core/providers/openvino/backend_utils.h" #include "core/providers/openvino/qdq_transformations/qdq_stripping.h" +#include "core/providers/openvino/ov_interface.h" namespace onnxruntime { namespace openvino_ep { @@ -320,9 +321,10 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) { return false; } -static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_name, - ONNX_NAMESPACE::ModelProto* model_proto, - const onnxruntime::Node& fused_node) { +static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name, + [[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto, + [[maybe_unused]] const onnxruntime::Node& fused_node) { +#ifdef NOT_RELEASE if (openvino_ep::backend_utils::IsDebugEnabled()) { auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : onnx_model_path_name.filename(); @@ -331,11 +333,13 @@ static void DumpOpenVINOEPModel(const std::filesystem::path& onnx_model_path_nam if (dash != std::string::npos) { auto new_name = model_name.stem().string() + subgraph_name.substr(dash, std::string::npos); model_name.replace_filename(new_name); + model_name.replace_extension(".onnx"); } std::fstream dump(model_name, std::ios::out | std::ios::trunc | std::ios::binary); model_proto->SerializeToOstream(dump); } +#endif } std::unique_ptr @@ -358,14 +362,29 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, } }; + [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph); + [[maybe_unused]] std::optional enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type); +#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025)) + if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) { + if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled"; + OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)}); + // disabling OVEP qdq stripping + // at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false + // as compiler stripping is enabled + enable_ovep_qdq_optimizer = false; + } else { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled"; + } + } +#endif + const auto& onnx_model_path_name = subgraph.ModelPath(); // QDQ stripping enabled only for the NPU if (session_context_.device_type.find("NPU") != std::string::npos && - session_context_.enable_qdq_optimizer && - IsQDQGraph(subgraph)) { - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1"; + (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) { std::unique_ptr model; - Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights); + Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); print_model_proto_duration(); @@ -373,7 +392,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); return model_proto; } else { - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0"; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; auto model = subgraph.CreateModel(logger); auto model_proto = model->ToProto(); model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 255154b8788ad..2ee5e9ec3e3a9 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -137,14 +137,14 @@ bool IsCILogEnabled() { } std::shared_ptr -CreateOVModel(const std::string model, +CreateOVModel(std::string&& model, const SessionContext& session_context, std::map>& const_outputs_map) { if (IsCILogEnabled()) { std::cout << "CreateNgraphFunc" << std::endl; } try { - auto ov_model = OVCore::Get()->ReadModel(model, session_context.onnx_model_path_name.string()); + auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string()); // Check for Constant Folding if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index a4e6fc0828f79..f13b1b05ced67 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor, size_t batch_slice_idx); std::shared_ptr -CreateOVModel(const std::string model, +CreateOVModel(std::string&& model, const SessionContext& session_context, std::map>& const_outputs_map); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 44b811e6af2c0..c814df618e3b3 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -2,6 +2,8 @@ // Licensed under the MIT License #include +#include + #include #include #include @@ -69,14 +71,11 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); model_stream.reset(); // Delete stream after it is no longer needed } else { - std::shared_ptr ov_model; - { - const std::string model = model_proto->SerializeAsString(); - if (!subgraph_context.has_dynamic_input_shape) { - delete model_proto.release(); - } - ov_model = CreateOVModel(model, session_context_, const_outputs_map_); + std::string model = model_proto->SerializeAsString(); + if (!subgraph_context.has_dynamic_input_shape) { + model_proto.reset() } + auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled"; exe_network_ = OVCore::Get()->CompileModel( ov_model, remote_context_, subgraph_context_.subgraph_name); @@ -108,14 +107,11 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); } else { // For all other types use ov::ov_core read_model() to generate OV IR // followed by ov::ov_core compile_model() - std::shared_ptr ov_model; - { - const std::string model = model_proto->SerializeAsString(); - if (!subgraph_context.has_dynamic_input_shape) { - delete model_proto.release(); - } - ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); + std::string model = model_proto->SerializeAsString(); + if (!subgraph_context.has_dynamic_input_shape) { + model_proto.reset(); } + auto ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, subgraph_context_.subgraph_name); } @@ -164,10 +160,8 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { if (session_context_.precision.find("FP32") != std::string::npos) { device_config.emplace(ov::hint::inference_precision("f32")); } - if (session_context_.precision.find("ACCURACY") != std::string::npos && - session_context_.device_type.find("GPU") != std::string::npos) { + if (session_context_.precision.find("ACCURACY") != std::string::npos) { if (session_context_.OpenVINO_Version.at(0) >= 2024) { - device_config.emplace(ov::hint::inference_precision(ov::element::undefined)); device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY)); } else { if (!subgraph_context_.model_precision.empty()) @@ -230,6 +224,15 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { } } } + auto find_device_type_mode = [&](const std::string& device_type) -> std::string { + std::string device_mode = ""; + auto delimiter_pos = device_type.find(':'); + if (delimiter_pos != std::string::npos) { + std::stringstream str_stream(device_type.substr(0, delimiter_pos)); + std::getline(str_stream, device_mode, ','); + } + return device_mode; + }; // Parse device types like "AUTO:CPU,GPU" and extract individual devices auto parse_individual_devices = [&](const std::string& device_type) -> std::vector { @@ -278,8 +281,14 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { if (session_context_.device_type.find("AUTO") == 0 || session_context_.device_type.find("HETERO") == 0 || session_context_.device_type.find("MULTI") == 0) { + //// Parse to get the device mode (e.g., "AUTO:CPU,GPU" -> "AUTO") + std::unordered_set supported_mode = {"AUTO", "HETERO", "MULTI"}; + auto device_mode = find_device_type_mode(session_context_.device_type); + ORT_ENFORCE(supported_mode.find(device_mode) != supported_mode.end(), " Invalid device mode is passed : ", session_context_.device_type); // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"]) auto individual_devices = parse_individual_devices(session_context_.device_type); + if (!device_mode.empty()) individual_devices.emplace_back(device_mode); + // Set properties only for individual devices (e.g., "CPU", "GPU") for (const std::string& device : individual_devices) { if (target_config.count(device)) { diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index a1a756a9baef7..1314edd54e937 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,9 @@ struct ProviderInfo { bool so_share_ep_contexts{false}; // ORT session option fs::path so_context_file_path{}; // ORT session option const ConfigOptions* config_options{NULL}; + const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", + "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", + "disable_dynamic_shapes"}; }; // Holds context applicable to the entire EP instance. diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 6482a07ee92bc..f9d4ab13cf2ce 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -58,43 +58,6 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const ProviderInfo& info, s shared_context_{shared_context}, ep_ctx_handle_{session_context_.openvino_sdk_version, *GetLogger()} { InitProviderOrtApi(); - - // to check if target device is available - // using OVCore capability GetAvailableDevices to fetch list of devices plugged in - if (info.cache_dir.empty()) { - bool device_found = false; - std::vector available_devices = OVCore::Get()->GetAvailableDevices(); - // Checking for device_type configuration - if (info.device_type != "") { - if (info.device_type.find("HETERO") != std::string::npos || - info.device_type.find("MULTI") != std::string::npos || - info.device_type.find("AUTO") != std::string::npos) { - device_found = true; - } else { - for (const std::string& device : available_devices) { - if (device.rfind(info.device_type, 0) == 0) { - if (info.device_type.find("GPU") != std::string::npos && (info.precision == "FP32" || - info.precision == "FP16" || - info.precision == "ACCURACY")) { - device_found = true; - break; - } - if (info.device_type == "CPU" && (info.precision == "FP32")) { - device_found = true; - break; - } - if (info.device_type.find("NPU") != std::string::npos) { - device_found = true; - break; - } - } - } - } - } - if (!device_found) { - ORT_THROW("[ERROR] [OpenVINO] Specified device - " + info.device_type + " is not available"); - } - } } OpenVINOExecutionProvider::~OpenVINOExecutionProvider() { diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc new file mode 100644 index 0000000000000..067076b1f84f2 --- /dev/null +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -0,0 +1,120 @@ +#include +#include "core/providers/openvino/openvino_parser_utils.h" +#include "core/providers/shared_library/provider_api.h" + +namespace onnxruntime { +namespace openvino_ep { + +std::string OpenVINOParserUtils::ParsePrecision(const ProviderOptions& provider_options, + std::string& device_type, + const std::string& option_name) { + using DeviceName = std::string; + using DefaultValue = std::string; + using ValidValues = std::vector; + using DefaultAndValidPair = std::pair; + using ParserHelper = std::unordered_map; + // {Device prefix, {Default precision, {Supported precisions}}} + ParserHelper helper = { + {"GPU", {"FP16", {"FP16", "FP32", "ACCURACY"}}}, + {"NPU", {"FP16", {"FP16", "ACCURACY"}}}, + {"CPU", {"FP32", {"FP32", "ACCURACY"}}}, + }; + + // If we have multiple device configuration, request precision from user and check it + if ((device_type.find("HETERO:") == 0) || + (device_type.find("MULTI:") == 0) || + (device_type.find("BATCH:") == 0) || + (device_type.find("AUTO:") == 0)) { + if (!provider_options.contains(option_name)) { + LOGS_DEFAULT(INFO) << "[OpenVINO] Precision is not set. Using default OpenVINO precision for " + device_type + ". \n"; + return ""; + } else { + std::unordered_set supported_precisions = {"FP16", "FP32", "ACCURACY"}; + std::string precision = provider_options.at(option_name); + if (supported_precisions.contains(precision)) { + return precision; + } else { + ORT_THROW("[ERROR] [OpenVINO] Unsupported precision for the ", device_type, " device. Device supports only FP16, FP32, ACCURACY.\n"); + } + } + } + + // Deprecated device specification (CPU_FP32, GPU.0_FP32, etc.) + if (auto delimit = device_type.find("_"); delimit != std::string::npos) { + if (provider_options.contains(option_name)) { + ORT_THROW("[ERROR] [OpenVINO] Precision is specified twice, please remove the _precision suffix from device name and only set the precision separately.\n"); + } + LOGS_DEFAULT(WARNING) << "[OpenVINO] Selected 'device_type' " + device_type + " is deprecated. \n" + << "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', " + << "'GPU.1', 'NPU' or from" + << " HETERO/MULTI/AUTO/BATCH options and set 'precision' separately. \n"; + std::string precision = device_type.substr(delimit + 1); + // Device type is updated in-place + device_type = device_type.substr(0, delimit); + // We have to remove the index (.0, .1, etc.) to use device as key for helper + std::string device_prefix = device_type; + if (auto dot_delimit = device_prefix.find("."); dot_delimit != std::string::npos) { + device_prefix = device_prefix.substr(0, dot_delimit); + } + + if (!helper.contains(device_prefix)) { + ORT_THROW("[ERROR] [OpenVINO] Selected 'device_type' " + device_type + " is not supported with precision suffix. \n"); + } + const auto& valid_values = helper[device_prefix].second; + if (std::find(std::begin(valid_values), std::end(valid_values), precision) != std::end(valid_values)) { + return precision; + } else { + auto value_iter = valid_values.begin(); + std::string valid_values_joined = *value_iter; + // Append 2nd and up, if only one then ++value_iter is same as end() + for (++value_iter; value_iter != valid_values.end(); ++value_iter) { + valid_values_joined += ", " + *value_iter; + } + + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. ", device_type, " only supports ", valid_values_joined, ".\n"); + } + } + + // Deprecated devices are already handled above + // We have to remove the index (.0, .1, etc.) to use device as key for helper + auto device_prefix = device_type; + if (auto dot_delimit = device_prefix.find("."); dot_delimit != std::string::npos) { + device_prefix = device_prefix.substr(0, dot_delimit); + } + + if (provider_options.contains(option_name)) { + std::string precision = provider_options.at(option_name); + + if (helper.contains(device_prefix)) { + auto const& valid_values = helper[device_prefix].second; + if (std::find(std::begin(valid_values), std::end(valid_values), precision) != std::end(valid_values)) { + return precision; // Return precision selected if valid + } else { + auto value_iter = valid_values.begin(); + std::string valid_values_joined = *value_iter; + // Append 2nd and up, if only one then ++value_iter is same as end() + for (++value_iter; value_iter != valid_values.end(); ++value_iter) { + valid_values_joined += ", " + *value_iter; + } + + ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. ", device_type, " only supports ", valid_values_joined, ".\n"); + } + } else { + // Not found in helper - custom device, return as is + return precision; + } + } else { + // Precision not set + if (helper.contains(device_prefix)) { + // If found in helper - set the default + return helper[device_prefix].first; + } else { + // Not found in helper - custom device - default precision + LOGS_DEFAULT(INFO) << "[OpenVINO] Precision is not set. Using default OpenVINO precision for " + device_type + ". \n"; + return ""; + } + } +} + +} // namespace openvino_ep +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h new file mode 100644 index 0000000000000..3e23c9e788463 --- /dev/null +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "core/framework/provider_options.h" + +namespace onnxruntime { +namespace openvino_ep { + +class OpenVINOParserUtils { + public: + static std::string ParsePrecision(const ProviderOptions& provider_options, + std::string& device_type, + const std::string& option_name); +}; + +} // namespace openvino_ep +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 5f402162432ba..e36ff48d0351d 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -2,6 +2,8 @@ // Licensed under the MIT License #include +#include + #include #include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/openvino_provider_factory.h" @@ -11,6 +13,7 @@ #include "core/providers/openvino/backend_utils.h" #include "core/session/onnxruntime_session_options_config_keys.h" #include "nlohmann/json.hpp" +#include "core/providers/openvino/openvino_parser_utils.h" namespace onnxruntime { namespace openvino_ep { @@ -54,116 +57,137 @@ bool ParseBooleanOption(const ProviderOptions& provider_options, std::string opt return false; } -std::string ParseDeviceType(std::shared_ptr ov_core, const ProviderOptions& provider_options, std::string option_name) { - const std::vector ov_available_devices = ov_core->GetAvailableDevices(); - - std::set ov_supported_device_types = {"CPU", "GPU", - "GPU.0", "GPU.1", "NPU"}; - std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", - "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16"}; - - // Expand set of supported device with OV devices - ov_supported_device_types.insert(ov_available_devices.begin(), ov_available_devices.end()); - - if (provider_options.contains(option_name)) { - const auto& selected_device = provider_options.at("device_type"); - - if (deprecated_device_types.contains(selected_device)) { - // Deprecated device and precision is handled together at ParsePrecision - return selected_device; - } - - if (!((ov_supported_device_types.contains(selected_device)) || - (selected_device.find("HETERO:") == 0) || - (selected_device.find("MULTI:") == 0) || - (selected_device.find("AUTO:") == 0))) { - ORT_THROW( - "[ERROR] [OpenVINO] You have selected wrong configuration value for the key 'device_type'. " - "Select from 'CPU', 'GPU', 'NPU', 'GPU.x' where x = 0,1,2 and so on or from" - " HETERO/MULTI/AUTO options available. \n"); +std::string ParseDeviceType(std::shared_ptr ov_core, const ProviderOptions& provider_options) { + std::set supported_device_types = {"CPU", "GPU", "NPU"}; + std::set supported_device_modes = {"AUTO", "HETERO", "MULTI"}; + std::vector devices_to_check; + std::string selected_device; + std::vector luid_list; + std::string device_mode = ""; + std::map ov_luid_map; + + if (provider_options.contains("device_type")) { + selected_device = provider_options.at("device_type"); + std::erase(selected_device, ' '); + if (selected_device == "AUTO") return selected_device; + + if (auto delimit = selected_device.find(":"); delimit != std::string::npos) { + device_mode = selected_device.substr(0, delimit); + if (supported_device_modes.contains(device_mode)) { + const auto& devices = selected_device.substr(delimit + 1); + devices_to_check = split(devices, ','); + ORT_ENFORCE(devices_to_check.size() > 0, "Mode AUTO/HETERO/MULTI should have devices listed based on priority"); + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes are AUTO/HETERO/MULTI"); + } + } else { + devices_to_check.push_back(selected_device); } - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << selected_device; - return selected_device; } else { - std::string default_device; - // Take default behavior from project configuration #if defined OPENVINO_CONFIG_CPU - default_device = "CPU"; + selected_device = "CPU"; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << selected_device; + return selected_device; #elif defined OPENVINO_CONFIG_GPU - default_device = "GPU"; + selected_device = "GPU"; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << selected_device; + return selected_device; #elif defined OPENVINO_CONFIG_NPU - default_device = "NPU"; + selected_device = "NPU"; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << selected_device; + return selected_device; #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO - default_device = DEVICE_NAME; - - // Validate that devices passed are valid - int delimit = device_type.find(":"); - const auto& devices = device_type.substr(delimit + 1); - auto device_list = split(devices, ','); - for (const auto& device : devices) { - if (!ov_supported_device_types.contains(device)) { - ORT_THROW("[ERROR] [OpenVINO] Invalid device selected: ", device); - } - } + selected_device = DEVICE_NAME; + + // Add sub-devices to check-list + int delimit = selected_device.find(":"); + const auto& devices = selected_device.substr(delimit + 1); + devices_to_check = split(devices, ','); #endif + } - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << default_device; - return default_device; + // Get the LUID passed from the provider option in a comma separated string list + // Compare each of the LUID's against the LUID obtained using ov property and map with the right device + if (provider_options.contains("device_luid")) { + std::string luid_str = provider_options.at("device_luid"); + std::erase(luid_str, ' '); + luid_list = split(luid_str, ','); } -} -// Depends on ProviderOptions. -std::string ParsePrecision(const ProviderOptions& provider_options, std::string& device_type, const std::string& option_name) { - using DeviceName = std::string; - using DefaultValue = std::string; - using ValidValues = std::list; - using foo = std::pair; - using ParserHelper = std::map; - ParserHelper helper = { - {"GPU", {"FP16", {"FP16", "FP32"}}}, - {"NPU", {"FP16", {"FP16"}}}, - {"CPU", {"FP32", {"FP32"}}}, - }; - - std::set deprecated_device_types = {"CPU_FP32", "GPU_FP32", - "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16"}; + bool all_devices_found = true; - if (provider_options.contains(option_name)) { - // Start by checking if the device_type is a normal valid one - if (helper.contains(device_type)) { - auto const& valid_values = helper[device_type].second; - const auto& precision = provider_options.at(option_name); - if (precision == "ACCURACY") { - return valid_values.back(); // Return highest supported precision - } else { - if (std::find(valid_values.begin(), valid_values.end(), precision) != valid_values.end()) { - return precision; // Return precision selected if valid - } else { - auto value_iter = valid_values.begin(); - std::string valid_values_joined = *value_iter; - // Append 2nd and up, if only one then ++value_iter is same as end() - for (++value_iter; value_iter != valid_values.end(); ++value_iter) { - valid_values_joined += ", " + *value_iter; + for (auto device : devices_to_check) { + bool device_found = false; + // Check deprecated device format (CPU_FP32, GPU.0_FP16, etc.) and remove the suffix in place + // Suffix will be parsed in ParsePrecision + if (auto delimit = device.find("_"); delimit != std::string::npos) { + device = device.substr(0, delimit); + } + // Just the device name without .0, .1, etc. suffix + auto device_prefix = device; + // Check if device index is appended (.0, .1, etc.), if so, remove it + if (auto delimit = device_prefix.find("."); delimit != std::string::npos) + device_prefix = device_prefix.substr(0, delimit); + if (supported_device_types.contains(device_prefix)) { + try { + std::vector available_devices = ov_core->GetAvailableDevices(device_prefix); + // Here we need to find the full device name (with .idx, but without _precision) + if (std::find(std::begin(available_devices), std::end(available_devices), device) != std::end(available_devices)) + device_found = true; + if (device_prefix != "CPU" && luid_list.size() > 0) { + for (auto dev : available_devices) { + ov::device::LUID ov_luid = OVCore::Get()->core.get_property(dev, ov::device::luid); + std::stringstream ov_luid_str; + ov_luid_str << ov_luid; + ov_luid_map.emplace(ov_luid_str.str(), dev); } - - ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. ", device_type, " only supports", valid_values_joined, ".\n"); } + } catch (const char* msg) { + ORT_THROW(msg); } - } else if (deprecated_device_types.contains(device_type)) { - LOGS_DEFAULT(WARNING) << "[OpenVINO] Selected 'device_type' " + device_type + " is deprecated. \n" - << "Update the 'device_type' to specified types 'CPU', 'GPU', 'GPU.0', " - << "'GPU.1', 'NPU' or from" - << " HETERO/MULTI/AUTO options and set 'precision' separately. \n"; - auto delimit = device_type.find("_"); - device_type = device_type.substr(0, delimit); - return device_type.substr(delimit + 1); } + all_devices_found = all_devices_found && device_found; + } + if (luid_list.size() > 0) { + std::string ov_luid_devices; + for (auto luid_str : luid_list) { + if (ov_luid_map.contains(luid_str)) { + std::string ov_dev = ov_luid_map.at(luid_str); + std::string ov_dev_strip = split(ov_dev, '.')[0]; + if (std::find(std::begin(devices_to_check), std::end(devices_to_check), ov_dev) != std::end(devices_to_check) || + std::find(std::begin(devices_to_check), std::end(devices_to_check), ov_dev_strip) != std::end(devices_to_check)) { + if (!ov_luid_devices.empty()) ov_luid_devices = ov_luid_devices + ","; + ov_luid_devices = ov_luid_devices + ov_dev; + } else { + ORT_THROW(" LUID : ", ov_dev, " does not match with device_type : ", selected_device); + } + } else { + ORT_THROW(provider_options.at("device_luid"), " does not exist for the selected device_type : ", selected_device); + } + } + if (!device_mode.empty()) { + selected_device = device_mode + ":" + ov_luid_devices; + for (auto dev_str : devices_to_check) { + auto default_dev = split(dev_str, '.')[0]; + + if (ov_luid_devices.find(default_dev) == std::string::npos) + selected_device = selected_device + "," + dev_str; + } + } else { + selected_device = ov_luid_devices; + } + } + // If invalid device is chosen error is thrown + if (!all_devices_found) { + ORT_THROW( + "[ERROR] [OpenVINO] You have selected wrong configuration value for the key 'device_type'. " + "Select from 'CPU', 'GPU', 'NPU', 'GPU.x' where x = 0,1,2 and so on or from" + " HETERO/MULTI/AUTO/BATCH options available. \n"); + } else { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Choosing Device: " << selected_device; + return selected_device; } - // Return default - return helper[device_type].first; } void ParseProviderOptions([[maybe_unused]] ProviderInfo& result, [[maybe_unused]] const ProviderOptions& config_options) {} @@ -211,12 +235,22 @@ struct OpenVINO_Provider : Provider { ProviderInfo pi; pi.config_options = config_options; + // Lambda function to check for invalid keys and throw an error + auto validateKeys = [&]() { + for (const auto& pair : provider_options) { + if (pi.valid_provider_keys.find(pair.first) == pi.valid_provider_keys.end()) { + ORT_THROW("Invalid provider_option key: " + pair.first); + } + } + }; + validateKeys(); + std::string bool_flag = ""; // Minor optimization: we'll hold an OVCore reference to ensure we don't create a new core between ParseDeviceType and // (potential) SharedContext creation. auto ov_core = OVCore::Get(); - pi.device_type = ParseDeviceType(ov_core, provider_options, "device_type"); + pi.device_type = ParseDeviceType(ov_core, provider_options); if (provider_options.contains("device_id")) { std::string dev_id = provider_options.at("device_id").data(); @@ -232,7 +266,7 @@ struct OpenVINO_Provider : Provider { pi.cache_dir = provider_options.at("cache_dir"); } - pi.precision = ParsePrecision(provider_options, pi.device_type, "precision"); + pi.precision = OpenVINOParserUtils::ParsePrecision(provider_options, pi.device_type, "precision"); if (provider_options.contains("load_config")) { auto parse_config = [&](const std::string& config_str) -> std::map { @@ -254,9 +288,9 @@ struct OpenVINO_Provider : Provider { for (auto& [key, value] : json_config.items()) { ov::AnyMap inner_map; - + std::set valid_ov_devices = {"CPU", "GPU", "NPU", "AUTO", "HETERO", "MULTI"}; // Ensure the key is one of "CPU", "GPU", or "NPU" - if (key != "CPU" && key != "GPU" && key != "NPU") { + if (valid_ov_devices.find(key) == valid_ov_devices.end()) { LOGS_DEFAULT(WARNING) << "Unsupported device key: " << key << ". Skipping entry.\n"; continue; } @@ -339,12 +373,15 @@ struct OpenVINO_Provider : Provider { << "Executing with num_streams=1"; } } - pi.enable_opencl_throttling = ParseBooleanOption(provider_options, "enable_opencl_throttling"); - - pi.enable_qdq_optimizer = ParseBooleanOption(provider_options, "enable_qdq_optimizer"); + try { + pi.enable_opencl_throttling = ParseBooleanOption(provider_options, "enable_opencl_throttling"); - pi.disable_dynamic_shapes = ParseBooleanOption(provider_options, "disable_dynamic_shapes"); + pi.enable_qdq_optimizer = ParseBooleanOption(provider_options, "enable_qdq_optimizer"); + pi.disable_dynamic_shapes = ParseBooleanOption(provider_options, "disable_dynamic_shapes"); + } catch (std::string msg) { + ORT_THROW(msg); + } // Always true for NPU plugin or when passed . if (pi.device_type.find("NPU") != std::string::npos) { pi.disable_dynamic_shapes = true; diff --git a/onnxruntime/core/providers/openvino/ov_allocator.cc b/onnxruntime/core/providers/openvino/ov_allocator.cc index 0e5ff8ff98efb..431f5730c0342 100644 --- a/onnxruntime/core/providers/openvino/ov_allocator.cc +++ b/onnxruntime/core/providers/openvino/ov_allocator.cc @@ -10,12 +10,6 @@ namespace onnxruntime { using namespace openvino_ep; -constexpr size_t default_alignment = 4096; - -static inline size_t align_up(size_t size, size_t pow2_alignment) { - return (size + pow2_alignment - 1) & ~(pow2_alignment - 1); -} - OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, OrtDevice::DeviceId device_id, const char* name) : IAllocator(OrtMemoryInfo(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(device_type, OrtDevice::MemType::DEFAULT, device_id), device_id, OrtMemTypeCPUInput)), core_(core) { if (device_type == OrtDevice::NPU) { remote_ctx_ = core_.get_default_context("NPU").as(); @@ -26,16 +20,11 @@ OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, void* OVRTAllocator::Alloc(size_t size) { try { - size_t alloc_size = align_up(size + sizeof(ov::Tensor*) + default_alignment, default_alignment); ov::Tensor* tensor = new ov::Tensor(remote_ctx_.create_host_tensor(ov::element::Type_t::u8, - {alloc_size})); - uintptr_t data_ptr = reinterpret_cast(tensor->data()); - - ov::Tensor** ptr = reinterpret_cast(align_up(data_ptr + sizeof(ov::Tensor*), default_alignment)); - ptr[-1] = tensor; - - return reinterpret_cast(ptr); - + {size})); + std::unique_lock lock(mutex_); + allocated_.insert({tensor->data(), tensor}); + return reinterpret_cast(tensor->data()); } catch (const ov::Exception& e) { ORT_THROW(std::string("Alloc failed: ") + e.what()); } @@ -43,8 +32,14 @@ void* OVRTAllocator::Alloc(size_t size) { void OVRTAllocator::Free(void* p) { try { - ov::Tensor** ptr = reinterpret_cast(p); - delete ptr[-1]; + std::unique_lock lock(mutex_); + auto it = allocated_.find(p); + if (it != allocated_.end()) { + ov::Tensor* tensor = it->second; + allocated_.erase(it); + lock.unlock(); + delete tensor; + } } catch (const ov::Exception& e) { ORT_THROW(std::string("Free failed: ") + e.what()); } diff --git a/onnxruntime/core/providers/openvino/ov_allocator.h b/onnxruntime/core/providers/openvino/ov_allocator.h index 083cfc4d5aed3..f6e87111f47ff 100644 --- a/onnxruntime/core/providers/openvino/ov_allocator.h +++ b/onnxruntime/core/providers/openvino/ov_allocator.h @@ -3,9 +3,12 @@ #ifdef USE_OVEP_NPU_MEMORY #pragma once +#include + #include "core/common/inlined_containers.h" #include "core/framework/allocator.h" #include "openvino/runtime/remote_context.hpp" +#include "core/common/inlined_containers.h" namespace onnxruntime { @@ -18,6 +21,8 @@ class OVRTAllocator : public IAllocator { private: ov::Core& core_; ov::RemoteContext remote_ctx_; + InlinedHashMap allocated_; + std::mutex mutex_; }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 37f9e1c4e9201..a175ca863d1d1 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -46,9 +46,20 @@ void printDebugInfo(const ov::CompiledModel& obj) { } #endif -std::shared_ptr OVCore::ReadModel(const std::string& model, const std::string& model_path) { +// Function to check if a given OV property is enabled +std::optional queryOVProperty(const std::string& property, const std::string& device_type) { try { - std::istringstream modelStringStream(model); + // Get the property value + auto supported_properties = OVCore::Get()->core.get_property(device_type, ov::supported_properties); + return std::find(supported_properties.begin(), supported_properties.end(), property) != supported_properties.end(); + } catch (const std::exception&) { + return std::nullopt; // Property not found or invalid + } +} + +std::shared_ptr OVCore::ReadModel(std::string&& model, const std::string& model_path) { + try { + std::istringstream modelStringStream(std::move(model)); std::istream& modelStream = modelStringStream; // Try to load with FrontEndManager ov::frontend::FrontEndManager manager; @@ -164,8 +175,48 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr model_strea } #endif -std::vector OVCore::GetAvailableDevices() { - auto available_devices = core.get_available_devices(); +std::vector OVCore::GetAvailableDevices() const { + std::vector available_devices = core.get_available_devices(); + return available_devices; +} + +std::vector OVCore::GetAvailableDevices(const std::string& device_type) const { + std::vector available_devices; + std::vector devicesIDs; + // Uses logic from OpenVINO to only return available devices of the specified type (e.g. CPU, NPU or GPU) + try { + devicesIDs = core.get_property(device_type, ov::available_devices); + } catch (const ov::Exception&) { + // plugin is not created by e.g. invalid env + // Empty device list will be returned + } catch (const std::runtime_error& ex) { + // plugin is not created by e.g. invalid env + // Empty device list will be returned + ORT_THROW("[ERROR] [OpenVINO] An exception occurred while trying to create the ", + device_type, + " device: ", + ex.what()); + } catch (const std::exception& ex) { + ORT_THROW("[ERROR] [OpenVINO] An exception occurred while trying to create the ", + device_type, + " device: ", + ex.what()); + } catch (...) { + ORT_THROW("[ERROR] [OpenVINO] Unknown exception occurred while trying to create the ", + device_type, + " device"); + } + + if (devicesIDs.size() > 1 || + (devicesIDs.size() == 1 && devicesIDs[0] == "0")) { + for (const auto& deviceID : devicesIDs) { + available_devices.push_back(device_type + '.' + deviceID); + } + } + if (!devicesIDs.empty()) { + available_devices.push_back(device_type); + } + return available_devices; } diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 0ed51394a6ffa..bebe73bd702dd 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "openvino/openvino.hpp" #include "openvino/runtime/intel_npu/properties.hpp" @@ -37,6 +38,8 @@ typedef ov::intel_gpu::ocl::ClContext* OVRemoteContextPtr; typedef ov::RemoteContext OVRemoteContext; #endif +std::optional queryOVProperty(const std::string& property, const std::string& device_type); + template class WeakSingleton { public: @@ -67,7 +70,7 @@ struct OVCore : WeakSingleton { ov::Core core; // OV Interface For Reading Model - std::shared_ptr ReadModel(const std::string& model_stream, const std::string& model_path); + std::shared_ptr ReadModel(std::string&& model_stream, const std::string& model_path); // OV Interface for Compiling OV Model Type OVExeNetwork CompileModel(std::shared_ptr& ie_cnn_network, @@ -92,7 +95,8 @@ struct OVCore : WeakSingleton { OVRemoteContextPtr context, std::string name); #endif - std::vector GetAvailableDevices(); + std::vector GetAvailableDevices() const; + std::vector GetAvailableDevices(const std::string& device_type) const; void SetCache(const std::string& cache_dir_path); void SetStreams(const std::string& device_type, int num_streams); }; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index f7326642a5544..4e1387d2ef4a9 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -792,7 +792,8 @@ bool DataOps::node_is_supported(const NodeIndex node_idx, bool& has_external_wei if (((device_id_.find("CPU") != std::string::npos) || (device_id_.find("GPU") != std::string::npos)) && ((optype == "Expand") || (optype == "Equal") || (optype == "Slice") || (optype == "Concat") || - (optype == "Shape"))) { + (optype == "Shape") || (optype == "Cast") || + (optype == "Resize"))) { return; } has_unsupported_dimension = true; diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc index a9f6420d6ac3b..c071db9c3a4fb 100644 --- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc +++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc @@ -341,12 +341,13 @@ static bool CheckDQRuleSet(const NodeUnit& node_unit, } } +// this check is if QLinear node feed into the output of src graph which expects quantized output static bool CheckQFeedsIntoQuantizedOutput(const NodeUnit& node_unit, const std::unordered_map graph_op_data_type) { auto op_of_quantized_layer = node_unit.Outputs(); for (auto& itr : op_of_quantized_layer) { auto it = graph_op_data_type.find(itr.node_arg.Name()); - if (it != graph_op_data_type.end() && it->second == "tensor(uint8)") { + if (it != graph_op_data_type.end() && (it->second == "tensor(uint8)" || it->second == "tensor(uint16)")) { return true; } } @@ -369,6 +370,11 @@ static bool CheckQRuleSet(const NodeUnit& node_unit, graph_op_data_type[src_graph.GetNodeArg(ops->Name())->Name()] = ops->Type()->data(); } + // check If any quantized node feeds into the src graph output + if (CheckQFeedsIntoQuantizedOutput(node_unit, std::move(graph_op_data_type))) { + return true; + } + // If UInt16 Q, don't keep it if (GetQDQDataType(q_node) == DT_UINT16 || GetQDQDataType(q_node) == DT_INT16) { reason = SkipReason::Int16QDQ; @@ -381,8 +387,6 @@ static bool CheckQRuleSet(const NodeUnit& node_unit, } else if (op_type == "Add") { // Add keeps all Qs return true; - } else if (CheckQFeedsIntoQuantizedOutput(node_unit, std::move(graph_op_data_type))) { - return true; } else { // Keep Q of an unsupported Op only if the target that succeeds it is a supported Op in this list return IsNextTargetNodeOfQValid(q_node, &target_node, src_graph, {"Conv", "Add", "MatMul"}, false); @@ -444,9 +448,17 @@ static bool HandleDoubleQDQ(onnxruntime::Graph& dst_graph, const onnxruntime::Gr static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxruntime::GraphViewer& src_graph, const NodeUnit& node_unit, std::set& initializers_to_keep, - const logging::Logger& /* logger */) { + const logging::Logger& /* logger */, + bool IsWeightSharingWithoutOVEPQDQStripping) { assert(node_unit.UnitType() == NodeUnit::Type::SingleNode); + // this is the scenario where WAI is enabled and ovep stripping is disabled + // do not strip off any Q or DQ node + if (IsWeightSharingWithoutOVEPQDQStripping) { + AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode()); + return; + } + if (HandleDoubleQDQ(dst_graph, src_graph, node_unit, initializers_to_keep)) return; auto add_identity_op = [&](bool duplicate_dq) { @@ -508,7 +520,8 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph, const onnxruntime::GraphViewer& src_graph, const NodeUnit& node_unit, std::set& initializers_to_keep, - const logging::Logger& /* logger */) { + const logging::Logger& /* logger */, + bool IsWeightSharingWithoutOVEPQDQStripping) { assert(node_unit.UnitType() == NodeUnit::Type::QDQGroup); // Collect inputs coming into the node unit. @@ -526,7 +539,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph, SkipReason reason = SkipReason::Other; bool keep_dq = CheckDQRuleSet(node_unit, dq_node, src_graph, reason); - if (keep_dq) { + if (IsWeightSharingWithoutOVEPQDQStripping || keep_dq) { AddNode(initializers_to_keep, src_graph, dst_graph, *dq_node); dq_node_args_to_keep.insert({input_defs.at(0)->Name(), &dst_graph.GetOrCreateNodeArg(dq_node->OutputDefs().at(0)->Name(), @@ -594,7 +607,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph, bool keep_q = CheckQRuleSet(node_unit, q_node, src_graph, reason); - if (keep_q) { + if (IsWeightSharingWithoutOVEPQDQStripping || keep_q) { AddNode(initializers_to_keep, src_graph, dst_graph, *q_node); // if keep_q, then output defs of the target node doesn't change output_args.push_back(&dst_graph.GetOrCreateNodeArg(target_node.OutputDefs().at(i)->Name(), @@ -672,7 +685,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph, const logging::Logger& logger, bool enable_ovep_weight_sharing, /*out*/ std::unique_ptr& model, - /*out*/ sw& shared_weights) { + /*out*/ sw& shared_weights, + bool enable_ovep_qdq_optimizer) { // NOTE: This function is a re-implementation of GraphViewerToProto() in core/graph/graph_proto_serializer.cc // with the following differences: // - Uses onnxruntime::Graph APIs instead of onnx::GraphProto APIs. @@ -763,10 +777,12 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph, continue; // Already handled this node unit } + bool IsWeightSharingWithoutOVEPQDQStripping = enable_ovep_weight_sharing && !enable_ovep_qdq_optimizer; + if (node_unit->UnitType() == NodeUnit::Type::SingleNode) { - AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger); + AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping); } else { - AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger); + AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping); } seen_node_units.insert(node_unit); diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h index 02831525cba32..4b5696f4411bd 100644 --- a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h +++ b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h @@ -17,7 +17,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph, const logging::Logger& logger, bool enable_ovep_weight_sharing, /*out*/ std::unique_ptr& model, - /*out*/ sw& shared_weights); + /*out*/ sw& shared_weights, + bool enable_ovep_qdq_optimizer); bool dumpMetaDataMapToBinary(const sw::Metadata::Map& shared_weights, const std::string& filename); } // namespace openvino_ep diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index e3659e08c1e3a..184b8c6a3d81d 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -2004,7 +2004,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O ov_options_converted_map["context"] = context_string.str(); } - ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling; + ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling == 0 ? "true" : "false"; if (legacy_ov_options->enable_dynamic_shapes) { ov_options_converted_map["disable_dynamic_shapes"] = "false"; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 1cc17ea03fa32..b685b170c163f 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -680,11 +680,11 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ov_options[key] = value; } else if (deprecated_device_types.find(value) != deprecated_device_types.end()) { ov_options[key] = value; - } else if (value.find("HETERO:") == 0) { + } else if (value.find("HETERO") == 0) { ov_options[key] = value; - } else if (value.find("MULTI:") == 0) { + } else if (value.find("MULTI") == 0) { ov_options[key] = value; - } else if (value.find("AUTO:") == 0) { + } else if (value.find("AUTO") == 0) { ov_options[key] = value; } else { ORT_THROW( @@ -792,6 +792,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); } } else if (key == "device_memory_name") { device_memory_name_ = std::move(value); + } else if (key == "device_luid") { + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 3af6e8ccacfb8..f3ebc92409f77 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -1307,7 +1307,7 @@ def test_session_options_add_external_initializers(self): def test_register_custom_ops_library(self): if sys.platform.startswith("win"): - shared_library = "custom_op_library.dll" + shared_library = os.path.abspath("custom_op_library.dll") if not os.path.exists(shared_library): raise FileNotFoundError(f"Unable to find '{shared_library}'") @@ -1724,7 +1724,7 @@ def test_register_custom_e_ps_library(self): return if sys.platform.startswith("win"): - shared_library = "test_execution_provider.dll" + shared_library = os.path.abspath("test_execution_provider.dll") elif sys.platform.startswith("darwin"): # exclude for macos diff --git a/onnxruntime/test/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc b/onnxruntime/test/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc index 27d5c59439243..d4ce3320e13ca 100644 --- a/onnxruntime/test/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc +++ b/onnxruntime/test/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc @@ -35,7 +35,7 @@ static ov::element::Type ConvertONNXToOVType(ONNXTensorElementDataType onnx_type case ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: return ov::element::bf16; default: - return ov::element::undefined; + return ov::element::dynamic; } } diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino index b53a2302be403..e8e4f22153ca5 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino @@ -1,7 +1,7 @@ ARG UBUNTU_VERSION=22.04 FROM ubuntu:${UBUNTU_VERSION} -ARG OPENVINO_VERSION=2025.0.0 +ARG OPENVINO_VERSION=2025.1.0 ARG PYTHON_VERSION=3.10 ADD scripts /tmp/scripts @@ -19,9 +19,9 @@ ENV IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64 ENV DEBIAN_FRONTEND=noninteractive RUN cd /opt && mkdir -p intel && cd intel && \ - wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64.tgz && \ - tar xzf openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64.tgz && rm -rf openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64.tgz && \ - mv openvino_toolkit_ubuntu22_2025.0.0.17942.1f68be9f594_x86_64 openvino_2025.0.0 && \ + wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.1/linux/openvino_toolkit_ubuntu22_2025.1.0.18503.6fec06580ab_x86_64.tgz && \ + tar xzf openvino_toolkit_ubuntu22_2025.1.0.18503.6fec06580ab_x86_64.tgz && rm -rf openvino_toolkit_ubuntu22_2025.1.0.18503.6fec06580ab_x86_64.tgz && \ + mv openvino_toolkit_ubuntu22_2025.1.0.18503.6fec06580ab_x86_64 openvino_2025.1.0 && \ cd $INTEL_OPENVINO_DIR/install_dependencies && ./install_openvino_dependencies.sh -y WORKDIR /root diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile index e98fbf78df509..dd049d7260bdf 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile @@ -19,8 +19,8 @@ RUN dnf install -y --nodocs \ && dnf clean all \ && rm -rf /var/cache/dnf -ENV INTEL_OPENVINO_DIR=/opt/intel/openvino_2025.0.0 -ARG OPENVINO_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.0/linux/openvino_toolkit_rhel8_2025.0.0.17942.1f68be9f594_x86_64.tgz +ENV INTEL_OPENVINO_DIR=/opt/intel/openvino_2025.1.0 +ARG OPENVINO_PACKAGE_URL=https://storage.openvinotoolkit.org/repositories/openvino/packages/2025.1/linux/openvino_toolkit_rhel8_2025.1.0.18503.6fec06580ab_x86_64.tgz ARG TEMP_DIR=/tmp/openvino_installer RUN mkdir -p ${TEMP_DIR} && \