From 1957d10008f36802d03f935f8b1d0acc230b1f98 Mon Sep 17 00:00:00 2001 From: Dusan Erdeljan Date: Sun, 25 Jan 2026 18:33:00 +0100 Subject: [PATCH 1/5] [QNN EP] Enable offline x64 compilation with memhandle IO type (cherry picked from commit 4970a4c3d4bfca922b982c6206b8e735639072f1) --- onnxruntime/core/providers/qnn/builder/qnn_def.h | 2 +- .../core/providers/qnn/builder/qnn_model_wrapper.cc | 12 +++++++----- .../core/providers/qnn/qnn_execution_provider.cc | 7 +++++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 847de084c49f6..5c2a447b93951 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -257,7 +257,7 @@ class QnnTensorWrapper { dimensions_.assign(shape_data, shape_data + shape_rank); SetQnnTensorDim(qnn_tensor_, dimensions_); - SetQnnTensorMemType(qnn_tensor_, QNN_TENSORMEMTYPE_RAW); + SetQnnTensorMemType(qnn_tensor_, GetQnnTensorMemType(qnn_tensor)); return Status::OK(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 6032623541384..0c886390f3ddf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -68,13 +68,9 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor)); } - Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW; - if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) { - mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; - } tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type, std::move(tensor_info.quant_param), std::move(tensor_info.shape), - std::move(unpacked_tensor), mem_type); + std::move(unpacked_tensor)); return Status::OK(); } @@ -105,6 +101,12 @@ bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) { return true; } + Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW; + if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) { + mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; + } + SetQnnTensorMemType(tensor_wrapper.GetQnnTensor(), mem_type); + const Qnn_TensorType_t& qnn_tensor_type = tensor_wrapper.GetTensorType(); // save created tensors for later lookup to populate graph node construction model_tensors_map_.emplace(tensor_name, std::move(tensor_wrapper)); diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index c3d8328b37411..7be5c49266067 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -570,8 +570,11 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio enable_htp_shared_mem_allocator_ = ParseBoolOption(QNN_HTP_SHARED_MEMORY_ALLOCATOR_ENABLED, false, provider_options_map); if (enable_htp_shared_mem_allocator_) { // Initialize rpcmem_library_. - // This is necessary for HtpSharedMemoryAllocator to function and also indicates that the allocator is available. - rpcmem_library_ = std::make_shared(); + // This library is only necessary for the inference, if we are in context generation stage, there is no need to load it + // as no allocations will be made. + if (!context_cache_enabled_) { + rpcmem_library_ = std::make_shared(); + } model_settings_.htp_shared_memory = enable_htp_shared_mem_allocator_; } From 4b232d26f4b3d2d3d5da775f78546e6c392f9959 Mon Sep 17 00:00:00 2001 From: Dusan Erdeljan Date: Mon, 2 Mar 2026 22:35:35 +0100 Subject: [PATCH 2/5] Indicate that rpc mem library is required for shared allocator in the comment --- onnxruntime/core/providers/qnn/qnn_execution_provider.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 7be5c49266067..7d9de02654230 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -570,8 +570,8 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio enable_htp_shared_mem_allocator_ = ParseBoolOption(QNN_HTP_SHARED_MEMORY_ALLOCATOR_ENABLED, false, provider_options_map); if (enable_htp_shared_mem_allocator_) { // Initialize rpcmem_library_. - // This library is only necessary for the inference, if we are in context generation stage, there is no need to load it - // as no allocations will be made. + // This library is only necessary for the inference (for the shared memory allocator), if we are in context + // generation stage, there is no need to load it as no allocations will be made. if (!context_cache_enabled_) { rpcmem_library_ = std::make_shared(); } From 00bf7472db33c1020a99c0dec74e8c63f8dcedad Mon Sep 17 00:00:00 2001 From: Dusan Erdeljan Date: Fri, 6 Mar 2026 22:31:42 +0100 Subject: [PATCH 3/5] Add UT for memhandle type setting --- .../qnn/builder/qnn_model_wrapper.cc | 15 +- .../providers/qnn/builder/qnn_model_wrapper.h | 3 + .../providers/qnn/qnn_model_wrapper_test.cc | 219 ++++++++++++++++++ 3 files changed, 232 insertions(+), 5 deletions(-) create mode 100644 onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index 0c886390f3ddf..da62fefc9a01c 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -88,6 +88,15 @@ Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info, return Status::OK(); } +void QnnModelWrapper::SetTensorMemTypeFromSettings(QnnTensorWrapper& tensor_wrapper, + const std::string& tensor_name) { + Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW; + if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) { + mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; + } + SetQnnTensorMemType(tensor_wrapper.GetQnnTensor(), mem_type); +} + bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) { // Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_ std::string tensor_name = tensor_wrapper.GetName(); @@ -101,11 +110,7 @@ bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) { return true; } - Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW; - if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) { - mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; - } - SetQnnTensorMemType(tensor_wrapper.GetQnnTensor(), mem_type); + SetTensorMemTypeFromSettings(tensor_wrapper, tensor_name); const Qnn_TensorType_t& qnn_tensor_type = tensor_wrapper.GetTensorType(); // save created tensors for later lookup to populate graph node construction diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index c5aaf32dfb274..2d107f571babf 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -77,6 +77,9 @@ class QnnModelWrapper { const std::string& tensor_name, QnnTensorWrapper& tensor_wrapper) const; + // Sets the QNN tensor memory type based on model settings and whether the tensor is a graph input/output. + void SetTensorMemTypeFromSettings(QnnTensorWrapper& tensor_wrapper, const std::string& tensor_name); + // Add to internal tensor wrapper table bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper); diff --git a/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc b/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc new file mode 100644 index 0000000000000..e47fc003c0046 --- /dev/null +++ b/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc @@ -0,0 +1,219 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" + +// These tests require direct access to both real ORT internals (Model, Graph, GraphViewer) +// and QNN EP builder internals (QnnModelWrapper, QnnTensorWrapper). This is only possible +// when QNN EP is built as a static library, because the shared library build redefines +// ORT types as opaque wrappers in provider_api.h / provider_wrappedtypes.h. +#if !defined(ORT_MINIMAL_BUILD) && BUILD_QNN_EP_STATIC_LIB + +#include "core/graph/model.h" +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/qnn_def.h" +#include "test/util/include/default_providers.h" + +using namespace onnxruntime; +using namespace onnxruntime::qnn; + +namespace onnxruntime { +namespace test { + +namespace { + +// Helper to create a minimal QnnModelWrapper for unit testing. +// AddTensorWrapper does not invoke any QNN SDK functions, so we can use +// null handles and a zeroed-out interface struct. +struct QnnModelWrapperTestContext { + std::unique_ptr model; + std::unique_ptr graph_viewer; + QNN_INTERFACE_VER_TYPE qnn_interface; + Qnn_BackendHandle_t backend_handle; + std::unordered_map input_index_map; + std::unordered_map output_index_map; + + QnnModelWrapperTestContext() : qnn_interface(QNN_INTERFACE_VER_TYPE_INIT), + backend_handle(nullptr) { + model = std::make_unique("test", false, DefaultLoggingManager().DefaultLogger()); + Graph& graph = model->MainGraph(); + graph_viewer = std::make_unique(graph); + } + + std::unique_ptr CreateWrapper(const ModelSettings& settings) { + return std::make_unique( + *graph_viewer, + DefaultLoggingManager().DefaultLogger(), + qnn_interface, + backend_handle, + input_index_map, + output_index_map, + QnnBackendType::HTP, + settings); + } +}; + +} // namespace + +// Verifies that when htp_shared_memory is disabled (default), the mem type of a +// graph input tensor remains QNN_TENSORMEMTYPE_RAW. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryDisabled_GraphInput_MemTypeIsRaw) { + QnnModelWrapperTestContext ctx; + ctx.input_index_map = {{"input0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = false; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 3, 224, 224}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor))); + + const auto& stored = wrapper->GetQnnTensorWrapper("input0"); + EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW); +} + +// Verifies that when htp_shared_memory is enabled, a graph input tensor +// gets mem type set to QNN_TENSORMEMTYPE_MEMHANDLE. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_GraphInput_MemTypeIsMemHandle) { + QnnModelWrapperTestContext ctx; + ctx.input_index_map = {{"input0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = true; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 3, 224, 224}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor))); + + const auto& stored = wrapper->GetQnnTensorWrapper("input0"); + EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE); +} + +// Verifies that when htp_shared_memory is enabled, a graph output tensor +// gets mem type set to QNN_TENSORMEMTYPE_MEMHANDLE. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_GraphOutput_MemTypeIsMemHandle) { + QnnModelWrapperTestContext ctx; + ctx.output_index_map = {{"output0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = true; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 1000}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor))); + + const auto& stored = wrapper->GetQnnTensorWrapper("output0"); + EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE); +} + +// Verifies that when htp_shared_memory is enabled, an intermediate (native) tensor +// that is neither a graph input nor output retains QNN_TENSORMEMTYPE_RAW. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_IntermediateTensor_MemTypeIsRaw) { + QnnModelWrapperTestContext ctx; + // "intermediate0" is NOT in input_index_map or output_index_map. + + ModelSettings settings{}; + settings.htp_shared_memory = true; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("intermediate0", QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 256}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor))); + + const auto& stored = wrapper->GetQnnTensorWrapper("intermediate0"); + EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW); +} + +// Verifies that when htp_shared_memory is disabled, a graph output tensor +// retains QNN_TENSORMEMTYPE_RAW. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryDisabled_GraphOutput_MemTypeIsRaw) { + QnnModelWrapperTestContext ctx; + ctx.output_index_map = {{"output0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = false; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 1000}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor))); + + const auto& stored = wrapper->GetQnnTensorWrapper("output0"); + EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW); +} + +// Verifies that both graph input and output tensors get MEMHANDLE when +// htp_shared_memory is enabled, within the same wrapper instance. +TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_BothInputAndOutput_MemTypeIsMemHandle) { + QnnModelWrapperTestContext ctx; + ctx.input_index_map = {{"input0", 0}}; + ctx.output_index_map = {{"output0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = true; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper input_tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 3, 224, 224}); + QnnTensorWrapper output_tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 1000}); + + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(input_tensor))); + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(output_tensor))); + + const auto& stored_input = wrapper->GetQnnTensorWrapper("input0"); + EXPECT_EQ(GetQnnTensorMemType(stored_input.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE); + + const auto& stored_output = wrapper->GetQnnTensorWrapper("output0"); + EXPECT_EQ(GetQnnTensorMemType(stored_output.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE); +} + +// Verifies that adding a duplicate tensor (same name) returns true +// and does not overwrite the existing entry. +TEST(QnnModelWrapperTest, AddTensorWrapper_DuplicateTensor_ReturnsTrueWithoutOverwrite) { + QnnModelWrapperTestContext ctx; + ctx.input_index_map = {{"input0", 0}}; + + ModelSettings settings{}; + settings.htp_shared_memory = false; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor1("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 3, 224, 224}); + ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor1))); + + // Attempt to add another tensor with the same name + QnnTensorWrapper tensor2("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_16, + QnnQuantParamsWrapper(), std::vector{1, 3, 112, 112}); + EXPECT_TRUE(wrapper->AddTensorWrapper(std::move(tensor2))); + + // Should still have the original data type + const auto& stored = wrapper->GetQnnTensorWrapper("input0"); + EXPECT_EQ(stored.GetTensorDataType(), QNN_DATATYPE_FLOAT_32); +} + +// Verifies that adding a tensor with an empty name returns false. +TEST(QnnModelWrapperTest, AddTensorWrapper_EmptyName_ReturnsFalse) { + QnnModelWrapperTestContext ctx; + + ModelSettings settings{}; + auto wrapper = ctx.CreateWrapper(settings); + + QnnTensorWrapper tensor("", QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_FLOAT_32, + QnnQuantParamsWrapper(), std::vector{1, 256}); + + EXPECT_FALSE(wrapper->AddTensorWrapper(std::move(tensor))); +} + +} // namespace test +} // namespace onnxruntime + +#endif // !defined(ORT_MINIMAL_BUILD) && BUILD_QNN_EP_STATIC_LIB From 9bfdd7408cffce9e3fbef8b71d7e0aaf57457c27 Mon Sep 17 00:00:00 2001 From: derdeljan-msft Date: Fri, 6 Mar 2026 22:35:52 +0100 Subject: [PATCH 4/5] Update onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index da62fefc9a01c..b02d2ac871bf7 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -89,7 +89,7 @@ Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info, } void QnnModelWrapper::SetTensorMemTypeFromSettings(QnnTensorWrapper& tensor_wrapper, - const std::string& tensor_name) { + const std::string& tensor_name) { Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW; if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) { mem_type = QNN_TENSORMEMTYPE_MEMHANDLE; From 4765f8ee43a3c7888da783e69995422528dbb107 Mon Sep 17 00:00:00 2001 From: Dusan Erdeljan Date: Fri, 6 Mar 2026 22:59:49 +0100 Subject: [PATCH 5/5] Fix test build --- onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc b/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc index e47fc003c0046..54f966928e577 100644 --- a/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc @@ -13,6 +13,7 @@ #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/qnn_def.h" #include "test/util/include/default_providers.h" +#include "test/util/include/test_environment.h" using namespace onnxruntime; using namespace onnxruntime::qnn;