Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions onnxruntime/core/providers/qnn/builder/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Status GetEpContextFromMainNode(const onnxruntime::Node& main_context_node,
const std::string& context_binary = node_helper.Get(EP_CACHE_CONTEXT, "");
return qnn_backend_manager->LoadCachedQnnContextFromBuffer(const_cast<char*>(context_binary.c_str()),
static_cast<uint64_t>(context_binary.length()),
"",
main_context_node.Name(),
qnn_models,
max_spill_fill_size);
Expand Down Expand Up @@ -127,6 +128,18 @@ Status GetEpContextFromMainNode(const onnxruntime::Node& main_context_node,
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "The file path in ep_cache_context does not exist or is not accessible.");
}

std::string context_binary_path_str = context_binary_path.string();
#ifdef QNN_FILE_MAPPED_WEIGHTS_ENABLED
if (qnn_backend_manager->FileMappingIsEnabled()) {
return qnn_backend_manager->LoadCachedQnnContextFromBuffer(nullptr,
0,
context_binary_path_str,
main_context_node.Name(),
qnn_models,
max_spill_fill_size);
}
#endif

size_t buffer_size{0};
std::ifstream cache_file(context_binary_path.string().c_str(), std::ifstream::binary);
ORT_RETURN_IF(!cache_file || !cache_file.good(), "Failed to open cache file.");
Expand All @@ -144,6 +157,7 @@ Status GetEpContextFromMainNode(const onnxruntime::Node& main_context_node,
cache_file.close();
return qnn_backend_manager->LoadCachedQnnContextFromBuffer(buffer.get(),
static_cast<uint64_t>(buffer_size),
context_binary_path_str,
main_context_node.Name(),
qnn_models,
max_spill_fill_size);
Expand Down
303 changes: 271 additions & 32 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"

#ifdef QNN_FILE_MAPPED_WEIGHTS_ENABLED
#include "core/providers/qnn/builder/qnn_file_mapping_callback_interface.h"
#endif

namespace onnxruntime {
namespace qnn {

Expand Down Expand Up @@ -154,6 +158,7 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
std::unique_ptr<unsigned char[]> GetContextBinaryBuffer(uint64_t& written_buffer_size);

Status LoadCachedQnnContextFromBuffer(char* buffer, uint64_t buffer_length,
const std::string& context_bin_filepath,
std::string node_name,
std::unordered_map<std::string, std::unique_ptr<qnn::QnnModel>>& qnn_models,
int64_t max_spill_fill_size);
Expand All @@ -163,6 +168,7 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
Status SetupBackend(const logging::Logger& logger, bool load_from_cached_context,
bool need_load_system_lib, bool share_ep_contexts,
bool enable_vtcm_backup_buffer_sharing,
bool enable_file_mapped_weights,
std::unordered_map<std::string, std::unique_ptr<std::vector<std::string>>>& context_bin_map);

Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);
Expand Down Expand Up @@ -246,7 +252,14 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
bool ProfilingEnabled() { return profiling_enabled_; }
#endif

bool FileMappingIsEnabled() { return file_mapped_weights_enabled_; }

private:
typedef struct BufferInfo {
std::unique_ptr<char[]> data;
size_t size;
} BufferInfo_t;

Status LoadBackend();

Status InitializeBackend();
Expand All @@ -263,9 +276,23 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

Status CreateContext(bool enable_htp_weight_sharing);

Status ReadContextBinIfValid(const std::string& context_bin_filepath,
BufferInfo_t& buffer_info,
bool read_file_contents);

Status CreateContextVtcmBackupBufferSharingEnabled(std::unordered_map<std::string,
std::unique_ptr<std::vector<std::string>>>& context_bin_map);

Status CreateContextFromListAsyncV1(const QnnContext_Config_t** configs,
std::unordered_map<std::string,
std::unique_ptr<std::vector<std::string>>>& context_bin_map);

#ifdef QNN_FILE_MAPPED_WEIGHTS_ENABLED
Status CreateContextFromListAsyncV2(const QnnContext_Config_t** configs,
std::unordered_map<std::string,
std::unique_ptr<std::vector<std::string>>>& context_bin_map);
#endif

Status ReleaseContext();

// Sets the ORT logger and creates a corresponding QNN logger with the same log level.
Expand Down Expand Up @@ -451,6 +478,15 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
bool context_created_ = false;
bool backend_setup_completed_ = false;
bool vtcm_backup_buffer_sharing_enabled_ = false;
bool file_mapped_weights_enabled_ = false;

#ifdef QNN_FILE_MAPPED_WEIGHTS_ENABLED
std::shared_ptr<FileMappingCallbackInterface> file_mapper_ = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is shared ownership needed?

// Notify params for file mapping must persist throughout lifetime of
// QnnBackendManager for release of DMA data callback on destruction
std::vector<std::pair<FileMappingCallbackInterface*, void*>> file_mapping_notify_params_;
#endif

// NPU backend requires quantized model
QnnBackendType qnn_backend_type_ = QnnBackendType::CPU;
Qnn_ProfileHandle_t profile_backend_handle_ = nullptr;
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ namespace qnn {
#define QNN_SYSTEM_PROFILE_API_ENABLED
#endif

#if defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
#if QNN_API_VERSION_MAJOR > 2 || ((QNN_API_VERSION_MAJOR) == 2 && (QNN_API_VERSION_MINOR >= 32))
#define QNN_FILE_MAPPED_WEIGHTS_ENABLED
#endif
#endif

// QNN only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose
// except the following flags for dlopen, others should be done only
// when we really need them
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include "core/providers/qnn/builder/qnn_def.h"
#ifdef QNN_FILE_MAPPED_WEIGHTS_ENABLED

#include <string>

#include <QnnContext.h>

#include "core/providers/qnn/ort_api.h"

namespace onnxruntime {
namespace qnn {

class FileMappingCallbackInterface {
public:
virtual ~FileMappingCallbackInterface() = default;
virtual Status MapContextBin(const std::string& bin_filepath,
void** notify_param) = 0;
virtual Status ReleaseContextBin(const std::string& model_name) = 0;

virtual Status GetContextBinMappingPointer(const std::string& bin_filepath, void** mapping_ptr) = 0;

virtual Status FreeContextBinMappingPointer(LPVOID bin_mapping_pointer) = 0;

virtual Qnn_ErrorHandle_t MapDmaData(Qnn_ContextBinaryDataRequest_t request,
Qnn_ContextBinaryDmaDataResponse_t* response,
void* notify_param) = 0;
virtual Qnn_ErrorHandle_t ReleaseDmaData(Qnn_ContextBinaryDmaDataMem_t data_mem,
void* notify_param) = 0;

virtual Qnn_ErrorHandle_t MapRawData(Qnn_ContextBinaryDataRequest_t request,
Qnn_ContextBinaryRawDataResponse_t* response,
void* notify_param) = 0;
virtual Qnn_ErrorHandle_t ReleaseRawData(Qnn_ContextBinaryRawDataMem_t data_mem,
void* notify_param) = 0;
};

} // namespace qnn
} // namespace onnxruntime

#endif // QNN_FILE_MAPPED_WEIGHTS_ENABLED
Loading
Loading