Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
};
// If the EPContext node with OVIR Encapsulation, then create
// an executable network from EP_CACHE_CONTEXT using read_model() & compile_model()
exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream,
exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream->stream_,
hw_target,
device_config,
enable_causallm,
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/ibackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class IBackend {
virtual ~IBackend() = default;
virtual void RewindKVCache(size_t index) {}
};
using ptr_stream_t = std::unique_ptr<std::istream>;
using ptr_stream_t = std::unique_ptr<ModelBlobWrapper>;
class BackendFactory {
public:
static std::shared_ptr<IBackend>
Expand Down
10 changes: 7 additions & 3 deletions onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@
return Status::OK();
}

std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
std::unique_ptr<ModelBlobWrapper>
EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
auto node = graph_viewer.GetNode(first_index);
ORT_ENFORCE(node != nullptr);
Expand All @@ -113,10 +114,11 @@
bool embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());

std::unique_ptr<std::istream> result;
std::filesystem::path blob_filepath{};
if (embed_mode) {
result.reset((std::istream*)new std::istringstream(ep_cache_context));
} else {
auto blob_filepath = so_context_file_path;
blob_filepath = so_context_file_path;
if (blob_filepath.empty() && !graph_viewer.ModelPath().empty()) {
blob_filepath = graph_viewer.ModelPath();
}
Expand All @@ -126,16 +128,18 @@
}

bool isXML = backend_utils::IsModelStreamXML(*result);
std::filesystem::path native_blob_path{};
if (!isXML) {
// If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was
// exported with must match the version that is currently running.
native_blob_path = std::move(blob_filepath);
ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_),
"EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() +
", but OpenVINO SDK version currently in use is " + openvino_sdk_version_);
}

LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
return result;
return std::make_unique<ModelBlobWrapper>(std::move(result), native_blob_path);

Check warning on line 142 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <utility> for move [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc:142: Add #include <utility> for move [build/include_what_you_use] [4]

Check warning on line 142 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for make_unique<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc:142: Add #include <memory> for make_unique<> [build/include_what_you_use] [4]
}

bool EPCtxHandler::CheckForOVEPCtxNodeInGraph(const GraphViewer& graph_viewer) const {
Expand Down
8 changes: 7 additions & 1 deletion onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
namespace onnxruntime {
namespace openvino_ep {

struct ModelBlobWrapper {
ModelBlobWrapper(std::unique_ptr<std::istream> stream, const std::filesystem::path& native_blob_path) : stream_(std::move(stream)), maybe_native_blob_path_(native_blob_path) {}

Check warning on line 16 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <utility> for move [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h:16: Add #include <utility> for move [build/include_what_you_use] [4]
std::unique_ptr<std::istream> stream_;
std::filesystem::path maybe_native_blob_path_;
};

// Utilities to handle EPContext node export and parsing of an EPContext node
// to create the compiled_model object to infer on
static const char EPCONTEXT_OP[] = "EPContext";
Expand All @@ -31,7 +37,7 @@
const std::string& graph_name,
const bool embed_mode,
std::string&& model_blob_str) const;
std::unique_ptr<std::istream> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const;
std::unique_ptr<ModelBlobWrapper> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const;
InlinedVector<const Node*> GetEPCtxNodes() const;
bool CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const;

Expand Down
14 changes: 12 additions & 2 deletions onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "core/providers/openvino/backend_utils.h"
#include "core/providers/openvino/backends/basic_backend.h"
#include "core/providers/openvino/ov_stateful_patch_utils.h"
#include "core/providers/openvino/onnx_ctx_model_helper.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -191,14 +192,23 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
"Exception while Loading Network for graph {}", name);
}

OVExeNetwork OVCore::ImportModel(std::istream& model_stream,
OVExeNetwork OVCore::ImportModel(ModelBlobWrapper& model_blob,
std::string hw_target,
const ov::AnyMap& device_config,
std::string name) {
return OvExceptionBoundary([&]() {
ov::CompiledModel obj;
obj = core.import_model(model_stream, hw_target, device_config);
#if (OPENVINO_VERSION_MAJOR > 2025 || (OPENVINO_VERSION_MAJOR == 2025 && OPENVINO_VERSION_MINOR >= 3))
if (!model_blob.maybe_native_blob_path_.empty()) {
obj = core.import_model(ov::read_tensor_data(model_blob.maybe_native_blob_path_), hw_target, device_config);
} else {
obj = core.import_model(*model_blob.stream_, hw_target, device_config);
}
#else
obj = core.import_model(*model_blob.stream_, hw_target, device_config);
#endif
OVExeNetwork exe(obj, hw_target);

#ifndef NDEBUG
printDebugInfo(exe.Get());
#endif
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace openvino_ep {
class OVCore;
class OVInferRequest;
class OVExeNetwork;
struct ModelBlobWrapper;

typedef ov::Tensor OVTensor;
typedef ov::ProfilingInfo OVProfilingInfo;
Expand Down Expand Up @@ -82,7 +83,7 @@ struct OVCore : WeakSingleton<OVCore> {
ov::AnyMap& device_config,
const std::string& name);
// OV Interface for Import model Stream
OVExeNetwork ImportModel(std::istream& model_stream,
OVExeNetwork ImportModel(ModelBlobWrapper& model_blob,
std::string hw_target,
const ov::AnyMap& device_config,
std::string name);
Expand Down
Loading