Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ class QnnTensorWrapper {
dimensions_.assign(shape_data, shape_data + shape_rank);
SetQnnTensorDim(qnn_tensor_, dimensions_);

SetQnnTensorMemType(qnn_tensor_, QNN_TENSORMEMTYPE_RAW);
SetQnnTensorMemType(qnn_tensor_, GetQnnTensorMemType(qnn_tensor));

return Status::OK();
}
Expand Down
17 changes: 12 additions & 5 deletions onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,9 @@ Status QnnModelWrapper::MakeTensorWrapper(const NodeUnitIODef& tensor, QnnTensor
ORT_RETURN_IF_ERROR(UnpackInitializerData(*tensor_info.initializer_tensor, unpacked_tensor));
}

Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW;
if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) {
mem_type = QNN_TENSORMEMTYPE_MEMHANDLE;
}
tensor_wrapper = QnnTensorWrapper(tensor_name, GetTensorType(tensor_name), tensor_info.qnn_data_type,
std::move(tensor_info.quant_param), std::move(tensor_info.shape),
std::move(unpacked_tensor), mem_type);
std::move(unpacked_tensor));
return Status::OK();
}

Expand All @@ -92,6 +88,15 @@ Status QnnModelWrapper::MakeTensorWrapper(const TensorInfo& tensor_info,
return Status::OK();
}

void QnnModelWrapper::SetTensorMemTypeFromSettings(QnnTensorWrapper& tensor_wrapper,
const std::string& tensor_name) {
Comment thread
derdeljan-msft marked this conversation as resolved.
Outdated
Qnn_TensorMemType_t mem_type = QNN_TENSORMEMTYPE_RAW;
if (true == model_settings_.htp_shared_memory && (IsGraphInput(tensor_name) || IsGraphOutput(tensor_name))) {
mem_type = QNN_TENSORMEMTYPE_MEMHANDLE;
}
SetQnnTensorMemType(tensor_wrapper.GetQnnTensor(), mem_type);
}

bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) {
// Keep a copy of tensor name sine it will be moved with the wrapper into model_tensors_map_
std::string tensor_name = tensor_wrapper.GetName();
Expand All @@ -105,6 +110,8 @@ bool QnnModelWrapper::AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper) {
return true;
}

SetTensorMemTypeFromSettings(tensor_wrapper, tensor_name);

const Qnn_TensorType_t& qnn_tensor_type = tensor_wrapper.GetTensorType();
// save created tensors for later lookup to populate graph node construction
model_tensors_map_.emplace(tensor_name, std::move(tensor_wrapper));
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class QnnModelWrapper {
const std::string& tensor_name,
QnnTensorWrapper& tensor_wrapper) const;

// Sets the QNN tensor memory type based on model settings and whether the tensor is a graph input/output.
void SetTensorMemTypeFromSettings(QnnTensorWrapper& tensor_wrapper, const std::string& tensor_name);

// Add to internal tensor wrapper table
bool AddTensorWrapper(QnnTensorWrapper&& tensor_wrapper);

Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -570,8 +570,11 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
enable_htp_shared_mem_allocator_ = ParseBoolOption(QNN_HTP_SHARED_MEMORY_ALLOCATOR_ENABLED, false, provider_options_map);
if (enable_htp_shared_mem_allocator_) {
// Initialize rpcmem_library_.
// This is necessary for HtpSharedMemoryAllocator to function and also indicates that the allocator is available.
rpcmem_library_ = std::make_shared<qnn::RpcMemLibrary>();
// This library is only necessary for the inference (for the shared memory allocator), if we are in context
// generation stage, there is no need to load it as no allocations will be made.
if (!context_cache_enabled_) {
rpcmem_library_ = std::make_shared<qnn::RpcMemLibrary>();
}
model_settings_.htp_shared_memory = enable_htp_shared_mem_allocator_;
}

Expand Down
219 changes: 219 additions & 0 deletions onnxruntime/test/providers/qnn/qnn_model_wrapper_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "gtest/gtest.h"

// These tests require direct access to both real ORT internals (Model, Graph, GraphViewer)
// and QNN EP builder internals (QnnModelWrapper, QnnTensorWrapper). This is only possible
// when QNN EP is built as a static library, because the shared library build redefines
// ORT types as opaque wrappers in provider_api.h / provider_wrappedtypes.h.
#if !defined(ORT_MINIMAL_BUILD) && BUILD_QNN_EP_STATIC_LIB

#include "core/graph/model.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/qnn_def.h"
#include "test/util/include/default_providers.h"

using namespace onnxruntime;
using namespace onnxruntime::qnn;

namespace onnxruntime {
namespace test {

namespace {

// Helper to create a minimal QnnModelWrapper for unit testing.
// AddTensorWrapper does not invoke any QNN SDK functions, so we can use
// null handles and a zeroed-out interface struct.
struct QnnModelWrapperTestContext {
std::unique_ptr<onnxruntime::Model> model;
std::unique_ptr<GraphViewer> graph_viewer;
QNN_INTERFACE_VER_TYPE qnn_interface;
Qnn_BackendHandle_t backend_handle;
std::unordered_map<std::string, size_t> input_index_map;
std::unordered_map<std::string, size_t> output_index_map;

QnnModelWrapperTestContext() : qnn_interface(QNN_INTERFACE_VER_TYPE_INIT),
backend_handle(nullptr) {
model = std::make_unique<onnxruntime::Model>("test", false, DefaultLoggingManager().DefaultLogger());
Graph& graph = model->MainGraph();
graph_viewer = std::make_unique<GraphViewer>(graph);
}

std::unique_ptr<QnnModelWrapper> CreateWrapper(const ModelSettings& settings) {
return std::make_unique<QnnModelWrapper>(
*graph_viewer,
DefaultLoggingManager().DefaultLogger(),
qnn_interface,
backend_handle,
input_index_map,
output_index_map,
QnnBackendType::HTP,
settings);
}
};

} // namespace

// Verifies that when htp_shared_memory is disabled (default), the mem type of a
// graph input tensor remains QNN_TENSORMEMTYPE_RAW.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryDisabled_GraphInput_MemTypeIsRaw) {
QnnModelWrapperTestContext ctx;
ctx.input_index_map = {{"input0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = false;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 3, 224, 224});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor)));

const auto& stored = wrapper->GetQnnTensorWrapper("input0");
EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW);
}

// Verifies that when htp_shared_memory is enabled, a graph input tensor
// gets mem type set to QNN_TENSORMEMTYPE_MEMHANDLE.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_GraphInput_MemTypeIsMemHandle) {
QnnModelWrapperTestContext ctx;
ctx.input_index_map = {{"input0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = true;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 3, 224, 224});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor)));

const auto& stored = wrapper->GetQnnTensorWrapper("input0");
EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE);
}

// Verifies that when htp_shared_memory is enabled, a graph output tensor
// gets mem type set to QNN_TENSORMEMTYPE_MEMHANDLE.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_GraphOutput_MemTypeIsMemHandle) {
QnnModelWrapperTestContext ctx;
ctx.output_index_map = {{"output0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = true;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 1000});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor)));

const auto& stored = wrapper->GetQnnTensorWrapper("output0");
EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE);
}

// Verifies that when htp_shared_memory is enabled, an intermediate (native) tensor
// that is neither a graph input nor output retains QNN_TENSORMEMTYPE_RAW.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_IntermediateTensor_MemTypeIsRaw) {
QnnModelWrapperTestContext ctx;
// "intermediate0" is NOT in input_index_map or output_index_map.

ModelSettings settings{};
settings.htp_shared_memory = true;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("intermediate0", QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 256});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor)));

const auto& stored = wrapper->GetQnnTensorWrapper("intermediate0");
EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW);
}

// Verifies that when htp_shared_memory is disabled, a graph output tensor
// retains QNN_TENSORMEMTYPE_RAW.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryDisabled_GraphOutput_MemTypeIsRaw) {
QnnModelWrapperTestContext ctx;
ctx.output_index_map = {{"output0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = false;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 1000});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor)));

const auto& stored = wrapper->GetQnnTensorWrapper("output0");
EXPECT_EQ(GetQnnTensorMemType(stored.GetQnnTensor()), QNN_TENSORMEMTYPE_RAW);
}

// Verifies that both graph input and output tensors get MEMHANDLE when
// htp_shared_memory is enabled, within the same wrapper instance.
TEST(QnnModelWrapperTest, AddTensorWrapper_SharedMemoryEnabled_BothInputAndOutput_MemTypeIsMemHandle) {
QnnModelWrapperTestContext ctx;
ctx.input_index_map = {{"input0", 0}};
ctx.output_index_map = {{"output0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = true;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper input_tensor("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 3, 224, 224});
QnnTensorWrapper output_tensor("output0", QNN_TENSOR_TYPE_APP_READ, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 1000});

ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(input_tensor)));
ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(output_tensor)));

const auto& stored_input = wrapper->GetQnnTensorWrapper("input0");
EXPECT_EQ(GetQnnTensorMemType(stored_input.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE);

const auto& stored_output = wrapper->GetQnnTensorWrapper("output0");
EXPECT_EQ(GetQnnTensorMemType(stored_output.GetQnnTensor()), QNN_TENSORMEMTYPE_MEMHANDLE);
}

// Verifies that adding a duplicate tensor (same name) returns true
// and does not overwrite the existing entry.
TEST(QnnModelWrapperTest, AddTensorWrapper_DuplicateTensor_ReturnsTrueWithoutOverwrite) {
QnnModelWrapperTestContext ctx;
ctx.input_index_map = {{"input0", 0}};

ModelSettings settings{};
settings.htp_shared_memory = false;
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor1("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 3, 224, 224});
ASSERT_TRUE(wrapper->AddTensorWrapper(std::move(tensor1)));

// Attempt to add another tensor with the same name
QnnTensorWrapper tensor2("input0", QNN_TENSOR_TYPE_APP_WRITE, QNN_DATATYPE_FLOAT_16,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 3, 112, 112});
EXPECT_TRUE(wrapper->AddTensorWrapper(std::move(tensor2)));

// Should still have the original data type
const auto& stored = wrapper->GetQnnTensorWrapper("input0");
EXPECT_EQ(stored.GetTensorDataType(), QNN_DATATYPE_FLOAT_32);
}

// Verifies that adding a tensor with an empty name returns false.
TEST(QnnModelWrapperTest, AddTensorWrapper_EmptyName_ReturnsFalse) {
QnnModelWrapperTestContext ctx;

ModelSettings settings{};
auto wrapper = ctx.CreateWrapper(settings);

QnnTensorWrapper tensor("", QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_FLOAT_32,
QnnQuantParamsWrapper(), std::vector<uint32_t>{1, 256});

EXPECT_FALSE(wrapper->AddTensorWrapper(std::move(tensor)));
}

} // namespace test
} // namespace onnxruntime

#endif // !defined(ORT_MINIMAL_BUILD) && BUILD_QNN_EP_STATIC_LIB
Loading