Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 111 additions & 44 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,18 @@
void* buffer;
ORT_RETURN_IF_ERROR(file_mapper_->GetContextBinMappedMemoryPtr(context_bin_filepath, &buffer));

uint32_t graph_count = 0;
QnnSystemContext_GraphInfo_t* graphs_info = nullptr;
Qnn_Version_t blob_version;
ORT_RETURN_IF_ERROR(GetGraphInfoAndBinVersion(buffer, buffer_size,
blob_version,
graph_count,
graphs_info));
ORT_RETURN_IF(!MinVersionMet(blob_version, {3, 3, 3}), "Context binary of ", context_bin_filepath, " is v",
std::to_string(blob_version.major), ".", std::to_string(blob_version.minor),
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
".", std::to_string(blob_version.patch),
". File mapping is only supported for versions >= 3.3.3. Disabling file mapping for this node.");
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated

auto notify_param_ptr = std::make_unique<FileMappingCallbackInfo_t>(buffer, buffer_size, this);

Qnn_ContextBinaryCallback_t context_file_map_callbacks;
Expand Down Expand Up @@ -1286,9 +1298,15 @@
auto rt = qnn_sys_interface_.systemContextCreate(&sys_ctx_handle);
ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create system handle.");

auto sys_ctx_handle_deleter = [&qnn_sys_interface = qnn_sys_interface_](void* handle) {
qnn_sys_interface.systemContextFree(reinterpret_cast<QnnSystemContext_Handle_t>(handle));
};

std::unique_ptr<void, decltype(sys_ctx_handle_deleter)> sys_ctx_handle_uptr(sys_ctx_handle, sys_ctx_handle_deleter);

const QnnSystemContext_BinaryInfo_t* binary_info = nullptr;
Qnn_ContextBinarySize_t binary_info_size{0};
rt = qnn_sys_interface_.systemContextGetBinaryInfo(sys_ctx_handle,
rt = qnn_sys_interface_.systemContextGetBinaryInfo(sys_ctx_handle_uptr.get(),
Comment thread
yuslepukhin marked this conversation as resolved.
Outdated
static_cast<void*>(buffer),
buffer_length,
&binary_info,
Expand Down Expand Up @@ -1350,62 +1368,52 @@
ORT_RETURN_IF(result, "Failed to get valid function pointer.");

void* bin_buffer = nullptr;
bool use_file_mapping = file_mapped_weights_enabled_;
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
// A nonzero buffer length implies an embedded context
if (file_mapped_weights_enabled_ && buffer_length == 0) {
ORT_RETURN_IF(!file_mapper_, "Attemping to use File Mapping feature but file_mapper_ is uninitialized");
if (use_file_mapping && buffer_length == 0) {
ORT_RETURN_IF(!file_mapper_, "Attempting to use File Mapping feature but file_mapper_ is uninitialized");

ORT_RETURN_IF_ERROR(GetFileSizeIfValid(context_bin_filepath, buffer_length));

ORT_RETURN_IF(buffer_length == 0, "Context bin has a size of 0 bytes: ", context_bin_filepath);
ORT_RETURN_IF_ERROR(file_mapper_->GetContextBinMappedMemoryPtr(context_bin_filepath, &bin_buffer));

Comment thread
quic-calvnguy marked this conversation as resolved.
} else {
if (use_file_mapping) {
use_file_mapping = false;
LOGS(*logger_, WARNING) << "Node " << node_name << " is using an embedded cache."
<< " Disabling file mapping for this node.";
}
ORT_RETURN_IF(buffer == nullptr, "Attempting to load QNN context from buffer but buffer is null");
bin_buffer = static_cast<void*>(buffer);
}
#else
bin_buffer = static_cast<void*>(buffer);
#endif

QnnSystemContext_Handle_t sys_ctx_handle = nullptr;
auto rt = qnn_sys_interface_.systemContextCreate(&sys_ctx_handle);
ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create system handle.");

const QnnSystemContext_BinaryInfo_t* binary_info = nullptr;
Qnn_ContextBinarySize_t binary_info_size{0};
rt = qnn_sys_interface_.systemContextGetBinaryInfo(sys_ctx_handle,
bin_buffer,
buffer_length,
&binary_info,
&binary_info_size);
ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to get context binary info.");

// binary_info life cycle is here
// Binary info to graph info
// retrieve Qnn graph info from binary info
ORT_RETURN_IF(nullptr == binary_info, "Qnn cached binary info is nullptr.");
uint32_t graph_count = 0;
QnnSystemContext_GraphInfo_t* graphs_info = nullptr;
if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
graph_count = binary_info->contextBinaryInfoV1.numGraphs;
graphs_info = binary_info->contextBinaryInfoV1.graphs;
}
#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 15) // starts from 2.22
else if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
graph_count = binary_info->contextBinaryInfoV2.numGraphs;
graphs_info = binary_info->contextBinaryInfoV2.graphs;
}
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
Qnn_Version_t blob_version;
ORT_RETURN_IF_ERROR(GetGraphInfoAndBinVersion(buffer, buffer_length,
blob_version,
graph_count,
graphs_info));
#else
ORT_RETURN_IF_ERROR(GetGraphInfoAndBinVersion(buffer, buffer_length,
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
graph_count,
graphs_info));
#endif
#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 21) // starts from 2.28
else if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {
graph_count = binary_info->contextBinaryInfoV3.numGraphs;
graphs_info = binary_info->contextBinaryInfoV3.graphs;

#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
if (use_file_mapping && !MinVersionMet(blob_version, {3, 3, 3})) {
LOGS(*logger_, WARNING) << "Context binary of " << node_name << " is v" << std::to_string(blob_version.major) << "."
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
<< std::to_string(blob_version.minor) << "." << std::to_string(blob_version.patch)
<< ". File mapping is only supported for versions >= 3.3.3. Disabling file mapping for this node.";
use_file_mapping = false;
}
#endif
else {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported context binary info version.");
}

ORT_RETURN_IF(graph_count < 1 || graphs_info == nullptr, "Failed to get graph info from Qnn cached context.");
LOGS(*logger_, VERBOSE) << "Graph count from QNN context: " << graph_count;
Expand Down Expand Up @@ -1452,7 +1460,7 @@

#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
Qnn_ContextBinaryCallback_t callbacks;
if (file_mapped_weights_enabled_ && file_mapper_) {
if (use_file_mapping && file_mapper_) {
ORT_RETURN_IF(nullptr == qnn_interface_.contextCreateFromBinaryWithCallback,
"Invalid function pointer for contextCreateFromBinaryWithCallback.");

Expand All @@ -1477,9 +1485,10 @@
}
#endif

Qnn_ErrorHandle_t rt = QNN_SUCCESS;

Check warning on line 1488 in onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc:1488: Add #include <vector> for vector<> [build/include_what_you_use] [4]
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
std::vector<char> backup_buffer;
if (file_mapped_weights_enabled_ && file_mapper_) {
if (use_file_mapping && file_mapper_) {
rt = qnn_interface_.contextCreateFromBinaryWithCallback(backend_handle_,
device_handle_,
context_configs,
Expand All @@ -1501,9 +1510,8 @@
bin_buffer = static_cast<void*>(backup_buffer.data());
}
}
#endif // QNN_FILE_MAPPED_WEIGHTS_AVAILABLE

if (!file_mapped_weights_enabled_ || rt != QNN_SUCCESS) {
#endif
Comment thread
quic-calvnguy marked this conversation as resolved.
if (!use_file_mapping || rt != QNN_SUCCESS) {
rt = qnn_interface_.contextCreateFromBinary(backend_handle_,
device_handle_,
context_configs,
Expand Down Expand Up @@ -1544,10 +1552,7 @@
}
}

qnn_sys_interface_.systemContextFree(sys_ctx_handle);
sys_ctx_handle = nullptr;
context_created_ = true;

LOGS(*logger_, VERBOSE) << "Load from cached QNN Context completed.";
return Status::OK();
}
Expand Down Expand Up @@ -2338,5 +2343,67 @@
return Status::OK();
}

Status QnnBackendManager::GetGraphInfoAndBinVersion(void* buffer, size_t buffer_length,
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
Qnn_Version_t& blob_version,
#endif
uint32_t& graph_count,
QnnSystemContext_GraphInfo_t* graphs_info) {
QnnSystemContext_Handle_t sys_ctx_handle = nullptr;
auto rt = qnn_sys_interface_.systemContextCreate(&sys_ctx_handle);
ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to create system handle.");

auto sys_ctx_handle_deleter = [&qnn_sys_interface = qnn_sys_interface_](void* handle) {
qnn_sys_interface.systemContextFree(reinterpret_cast<QnnSystemContext_Handle_t>(handle));
};
Comment thread
quic-calvnguy marked this conversation as resolved.

std::unique_ptr<void, decltype(sys_ctx_handle_deleter)> sys_ctx_handle_uptr(sys_ctx_handle, sys_ctx_handle_deleter);

Check warning on line 2360 in onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc:2360: Add #include <memory> for unique_ptr<> [build/include_what_you_use] [4]

const QnnSystemContext_BinaryInfo_t* binary_info = nullptr;
Qnn_ContextBinarySize_t binary_info_size{0};
rt = qnn_sys_interface_.systemContextGetBinaryInfo(sys_ctx_handle_uptr.get(),
buffer,
buffer_length,
&binary_info,
&binary_info_size);
ORT_RETURN_IF(QNN_SUCCESS != rt, "Failed to get context binary info.");
Comment thread
quic-calvnguy marked this conversation as resolved.

// binary_info life cycle is here
// Binary info to graph info
// retrieve Qnn graph info from binary info
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
ORT_RETURN_IF(nullptr == binary_info, "Qnn cached binary info is nullptr.");

if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
graph_count = binary_info->contextBinaryInfoV1.numGraphs;
graphs_info = binary_info->contextBinaryInfoV1.graphs;
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
blob_version = binary_info->contextBinaryInfoV1.contextBlobVersion;
#endif
}
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 15) // starts from 2.22
else if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {

Check warning on line 2384 in onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 If an else has a brace on one side, it should have it on both [readability/braces] [5] Raw Output: onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc:2384: If an else has a brace on one side, it should have it on both [readability/braces] [5]
graph_count = binary_info->contextBinaryInfoV2.numGraphs;
graphs_info = binary_info->contextBinaryInfoV2.graphs;
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
blob_version = binary_info->contextBinaryInfoV2.contextBlobVersion;
#endif
}
#endif
#if QNN_API_VERSION_MAJOR == 2 && (QNN_API_VERSION_MINOR >= 21) // starts from 2.28
else if (binary_info->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3) {

Check warning on line 2393 in onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 If an else has a brace on one side, it should have it on both [readability/braces] [5] Raw Output: onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc:2393: If an else has a brace on one side, it should have it on both [readability/braces] [5]
graph_count = binary_info->contextBinaryInfoV3.numGraphs;
graphs_info = binary_info->contextBinaryInfoV3.graphs;
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
blob_version = binary_info->contextBinaryInfoV3.contextBlobVersion;
#endif
}
#endif
else {

Check warning on line 2401 in onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 If an else has a brace on one side, it should have it on both [readability/braces] [5] Raw Output: onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc:2401: If an else has a brace on one side, it should have it on both [readability/braces] [5]
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported context binary info version.");
}

return Status::OK();
}
Comment thread
quic-calvnguy marked this conversation as resolved.

} // namespace qnn
} // namespace onnxruntime
18 changes: 18 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,24 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
return Status::OK();
}

Status GetGraphInfoAndBinVersion(void* buffer, size_t buffer_length,
#ifdef QNN_FILE_MAPPED_WEIGHTS_AVAILABLE
Qnn_Version_t& blob_version,
#endif
uint32_t& graph_count,
QnnSystemContext_GraphInfo_t* graphs_info);

// Checks if act_ver is >= min_ver. An act_ver of 0.0.0 is considered invalid.
bool MinVersionMet(const Qnn_Version_t& act_ver, const Qnn_Version_t& min_ver) {
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
if (act_ver.major == 0 && act_ver.minor == 0 && act_ver.patch == 0) {
return false;
}

return act_ver.major > min_ver.major ||
(act_ver.major == min_ver.major && act_ver.minor >= min_ver.minor) ||
Comment thread
quic-calvnguy marked this conversation as resolved.
Outdated
(act_ver.major == min_ver.major && act_ver.minor == min_ver.minor && act_ver.patch >= min_ver.patch);
}

private:
const std::string backend_path_;
std::recursive_mutex logger_recursive_mutex_;
Expand Down
Loading