From 89030b5a88b13b24973b9516a4a5a66636cc94bc Mon Sep 17 00:00:00 2001 From: Eric Crawford Date: Mon, 25 Aug 2025 13:58:05 -0700 Subject: [PATCH] Allow mmapping native binaries in ov 2025.3 --- .../providers/openvino/backends/basic_backend.cc | 2 +- onnxruntime/core/providers/openvino/ibackend.h | 2 +- .../providers/openvino/onnx_ctx_model_helper.cc | 10 +++++++--- .../providers/openvino/onnx_ctx_model_helper.h | 8 +++++++- .../core/providers/openvino/ov_interface.cc | 14 ++++++++++++-- onnxruntime/core/providers/openvino/ov_interface.h | 3 ++- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index a0c6b0c5984e1..2f174110dd31b 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -59,7 +59,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr }; // If the EPContext node with OVIR Encapsulation, then create // an executable network from EP_CACHE_CONTEXT using read_model() & compile_model() - exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream, + exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream->stream_, hw_target, device_config, enable_causallm, diff --git a/onnxruntime/core/providers/openvino/ibackend.h b/onnxruntime/core/providers/openvino/ibackend.h index ec38425f602eb..365a4625815d6 100644 --- a/onnxruntime/core/providers/openvino/ibackend.h +++ b/onnxruntime/core/providers/openvino/ibackend.h @@ -19,7 +19,7 @@ class IBackend { virtual ~IBackend() = default; virtual void RewindKVCache(size_t index) {} }; -using ptr_stream_t = std::unique_ptr; +using ptr_stream_t = std::unique_ptr; class BackendFactory { public: static std::shared_ptr diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 9e70756a254aa..051a39bd4f205 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -100,7 +100,8 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer, return Status::OK(); } -std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const { +std::unique_ptr +EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const { auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin(); auto node = graph_viewer.GetNode(first_index); ORT_ENFORCE(node != nullptr); @@ -113,10 +114,11 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy bool embed_mode = static_cast(attrs.at(EMBED_MODE).i()); std::unique_ptr result; + std::filesystem::path blob_filepath{}; if (embed_mode) { result.reset((std::istream*)new std::istringstream(ep_cache_context)); } else { - auto blob_filepath = so_context_file_path; + blob_filepath = so_context_file_path; if (blob_filepath.empty() && !graph_viewer.ModelPath().empty()) { blob_filepath = graph_viewer.ModelPath(); } @@ -126,16 +128,18 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy } bool isXML = backend_utils::IsModelStreamXML(*result); + std::filesystem::path native_blob_path{}; if (!isXML) { // If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was // exported with must match the version that is currently running. + native_blob_path = std::move(blob_filepath); ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_), "EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() + ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_); } LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; - return result; + return std::make_unique(std::move(result), native_blob_path); } bool EPCtxHandler::CheckForOVEPCtxNodeInGraph(const GraphViewer& graph_viewer) const { diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index b9ddb40a7a233..f207f5014ca1f 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -12,6 +12,12 @@ namespace onnxruntime { namespace openvino_ep { +struct ModelBlobWrapper { + ModelBlobWrapper(std::unique_ptr stream, const std::filesystem::path& native_blob_path) : stream_(std::move(stream)), maybe_native_blob_path_(native_blob_path) {} + std::unique_ptr stream_; + std::filesystem::path maybe_native_blob_path_; +}; + // Utilities to handle EPContext node export and parsing of an EPContext node // to create the compiled_model object to infer on static const char EPCONTEXT_OP[] = "EPContext"; @@ -31,7 +37,7 @@ class EPCtxHandler { const std::string& graph_name, const bool embed_mode, std::string&& model_blob_str) const; - std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const; + std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const; InlinedVector GetEPCtxNodes() const; bool CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 2d29df8eb4197..899845d4890cf 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -11,6 +11,7 @@ #include "core/providers/openvino/backend_utils.h" #include "core/providers/openvino/backends/basic_backend.h" #include "core/providers/openvino/ov_stateful_patch_utils.h" +#include "core/providers/openvino/onnx_ctx_model_helper.h" namespace onnxruntime { namespace openvino_ep { @@ -191,14 +192,23 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model, "Exception while Loading Network for graph {}", name); } -OVExeNetwork OVCore::ImportModel(std::istream& model_stream, +OVExeNetwork OVCore::ImportModel(ModelBlobWrapper& model_blob, std::string hw_target, const ov::AnyMap& device_config, std::string name) { return OvExceptionBoundary([&]() { ov::CompiledModel obj; - obj = core.import_model(model_stream, hw_target, device_config); +#if (OPENVINO_VERSION_MAJOR > 2025 || (OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR >= 3)) + if (!model_blob.maybe_native_blob_path_.empty()) { + obj = core.import_model(ov::read_tensor_data(model_blob.maybe_native_blob_path_), hw_target, device_config); + } else { + obj = core.import_model(*model_blob.stream_, hw_target, device_config); + } +#else + obj = core.import_model(*model_blob.stream_, hw_target, device_config); +#endif OVExeNetwork exe(obj, hw_target); + #ifndef NDEBUG printDebugInfo(exe.Get()); #endif diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 59c2cf95874b0..3e1f829258608 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -26,6 +26,7 @@ namespace openvino_ep { class OVCore; class OVInferRequest; class OVExeNetwork; +struct ModelBlobWrapper; typedef ov::Tensor OVTensor; typedef ov::ProfilingInfo OVProfilingInfo; @@ -82,7 +83,7 @@ struct OVCore : WeakSingleton { ov::AnyMap& device_config, const std::string& name); // OV Interface for Import model Stream - OVExeNetwork ImportModel(std::istream& model_stream, + OVExeNetwork ImportModel(ModelBlobWrapper& model_blob, std::string hw_target, const ov::AnyMap& device_config, std::string name);