Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion onnxruntime/core/common/cpuid_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) {
uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) {
if (vendor == "GenuineIntel") return 0x8086;
if (vendor == "GenuineAMD") return 0x1022;
if (vendor.find("Qualcomm") == 0) return 'Q' << 24 | 'C' << 16 | 'O' << 8 | 'M';
if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24);
if (vendor.find("NV") == 0) return 0x10DE;
return 0;
}
Expand Down
138 changes: 138 additions & 0 deletions onnxruntime/core/providers/qnn/qnn_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,27 @@
return std::make_shared<onnxruntime::QNNProviderFactory>(*provider_options, config_options);
}

Status CreateIExecutionProvider(const OrtHardwareDevice* const* /*devices*/,
const OrtKeyValuePairs* const* /*ep_metadata*/,
size_t num_devices,
ProviderOptions& provider_options,
const OrtSessionOptions& session_options,
const OrtLogger& logger,
std::unique_ptr<IExecutionProvider>& ep) override {
if (num_devices != 1) {
return Status(common::ONNXRUNTIME, ORT_EP_FAIL, "QNN EP only supports one device.");
}

const ConfigOptions* config_options = &session_options.GetConfigOptions();

std::array<const void*, 2> configs_array = {&provider_options, config_options};
const void* arg = reinterpret_cast<const void*>(&configs_array);
auto ep_factory = CreateExecutionProviderFactory(arg);
ep = ep_factory->CreateProvider(session_options, logger);

return Status::OK();
}

void Initialize() override {}
void Shutdown() override {}
} g_provider;
Expand All @@ -93,4 +114,121 @@
return &onnxruntime::g_provider;
}
}

#include "core/framework/error_code_helper.h"

// OrtEpApi infrastructure to be able to use the QNN EP as an OrtEpFactory for auto EP selection.
struct QnnEpFactory : OrtEpFactory {
QnnEpFactory(const OrtApi& ort_api_in,
const char* ep_name,
OrtHardwareDeviceType hw_type,
const char* qnn_backend_type)
: ort_api{ort_api_in}, ep_name{ep_name}, ort_hw_device_type{hw_type}, qnn_backend_type{qnn_backend_type} {
GetName = GetNameImpl;
GetVendor = GetVendorImpl;
GetSupportedDevices = GetSupportedDevicesImpl;
CreateEp = CreateEpImpl;
ReleaseEp = ReleaseEpImpl;
}

// Returns the name for the EP. Each unique factory configuration must have a unique name.
// Ex: a factory that supports NPU should have a different than a factory that supports GPU.
static const char* GetNameImpl(const OrtEpFactory* this_ptr) {
const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
return factory->ep_name.c_str();
}

static const char* GetVendorImpl(const OrtEpFactory* this_ptr) {
const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
return factory->vendor.c_str();
}

// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
// An EP created with this factory is expected to be able to execute a model with *all* supported
// hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among
// multiple different QNN backends at once (e.g, npu, cpu, gpu), so this factory instance is set to only
// support one backend: npu. To support a different backend, like gpu, create a different factory instance
// that only supports GPU.
static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
const OrtHardwareDevice* const* devices,
size_t num_devices,
OrtEpDevice** ep_devices,
size_t max_ep_devices,
size_t* p_num_ep_devices) {
size_t& num_ep_devices = *p_num_ep_devices;
auto* factory = static_cast<QnnEpFactory*>(this_ptr);

for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) {
const OrtHardwareDevice& device = *devices[i];
if (factory->ort_api.HardwareDevice_Type(&device) == factory->ort_hw_device_type &&
factory->ort_api.HardwareDevice_VendorId(&device) == factory->vendor_id) {
OrtKeyValuePairs* ep_options = nullptr;
factory->ort_api.CreateKeyValuePairs(&ep_options);
factory->ort_api.AddKeyValuePair(ep_options, "backend_type", factory->qnn_backend_type.c_str());
ORT_API_RETURN_IF_ERROR(
factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
&ep_devices[num_ep_devices++]));
}
}

return nullptr;
}

static OrtStatus* CreateEpImpl(OrtEpFactory* /*this_ptr*/,
_In_reads_(num_devices) const OrtHardwareDevice* const* /*devices*/,
_In_reads_(num_devices) const OrtKeyValuePairs* const* /*ep_metadata*/,
_In_ size_t /*num_devices*/,
_In_ const OrtSessionOptions* /*session_options*/,
_In_ const OrtLogger* /*logger*/,
_Out_ OrtEp** /*ep*/) {
return onnxruntime::CreateStatus(ORT_INVALID_ARGUMENT, "QNN EP factory does not support this method.");
}

static void ReleaseEpImpl(OrtEpFactory* /*this_ptr*/, OrtEp* /*ep*/) {
// no-op as we never create an EP here.
}

const OrtApi& ort_api;
const std::string ep_name; // EP name
const std::string vendor{"Microsoft"}; // EP vendor name

// Qualcomm vendor ID. Refer to the ACPI ID registry (search Qualcomm): https://uefi.org/ACPI_ID_List
const uint32_t vendor_id{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)};
const OrtHardwareDeviceType ort_hw_device_type; // Supported OrtHardwareDevice
const std::string qnn_backend_type; // QNN backend type for OrtHardwareDevice
};

extern "C" {
//
// Public symbols
//
OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase* ort_api_base,
OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION);

// Factory could use registration_name or define its own EP name.
auto factory_npu = std::make_unique<QnnEpFactory>(*ort_api,

Check warning on line 210 in onnxruntime/core/providers/qnn/qnn_provider_factory.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for make_unique<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/qnn/qnn_provider_factory.cc:210: Add #include <memory> for make_unique<> [build/include_what_you_use] [4]
onnxruntime::kQnnExecutionProvider,
OrtHardwareDeviceType_NPU, "htp");

// If want to support GPU, create a new factory instance because QNN EP is not currently setup to partition a single model
// among heterogeneous devices.
// std::unique_ptr<OrtEpFactory> factory_gpu = std::make_unique<QnnEpFactory>(*ort_api, "QNNExecutionProvider_GPU", OrtHardwareDeviceType_GPU, "gpu");

if (max_factories < 1) {
return ort_api->CreateStatus(ORT_INVALID_ARGUMENT,
"Not enough space to return EP factory. Need at least one.");
}

factories[0] = factory_npu.release();
*num_factories = 1;

return nullptr;
}

OrtStatus* ReleaseEpFactory(OrtEpFactory* factory) {
delete static_cast<QnnEpFactory*>(factory);
return nullptr;
}
}
#endif // !BUILD_QNN_EP_STATIC_LIB
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/qnn/symbols.def
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
EXPORTS
GetProvider
CreateEpFactories
ReleaseEpFactory
59 changes: 40 additions & 19 deletions onnxruntime/test/providers/qnn/qnn_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "core/session/onnxruntime_run_options_config_keys.h"

#include "test/providers/qnn/qnn_test_utils.h"
#include "test/util/include/api_asserts.h"

#include "gtest/gtest.h"
#include "gmock/gmock.h"
Expand All @@ -37,24 +38,24 @@ namespace test {
// TODO: When we need QNN in a minimal build we should add an ORT format version of the model
#if !defined(ORT_MINIMAL_BUILD)

static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
// Access the underlying InferenceSession.
const OrtSession* ort_session = session;
const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
bool has_ep = false;

for (const auto& provider : s->GetRegisteredProviderTypes()) {
if (provider == ep_name) {
has_ep = true;
break;
}
}
return has_ep;
}

// Tests that the QNN EP is registered when added via the public C++ API.
// Loads a simple ONNX model that adds floats.
TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
auto session_has_qnn_ep = [](Ort::Session& session) -> bool {
// Access the underlying InferenceSession.
const OrtSession* ort_session = session;
const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
bool have_qnn_ep = false;

for (const auto& provider : s->GetRegisteredProviderTypes()) {
if (provider == kQnnExecutionProvider) {
have_qnn_ep = true;
break;
}
}
return have_qnn_ep;
};

onnxruntime::ProviderOptions options;
#if defined(_WIN32)
options["backend_path"] = "QnnHtp.dll";
Expand All @@ -77,8 +78,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
so.AppendExecutionProvider("QNN", options);

Ort::Session session(*ort_env, ort_model_path, so);
ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
<< "when added to session with name 'QNN'";
ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
<< "QNN EP was not found in registered providers for session "
<< "providers for session when added to session with name 'QNN'";
}

{
Expand All @@ -92,8 +94,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
so.AppendExecutionProvider(kQnnExecutionProvider, options);

Ort::Session session(*ort_env, ort_model_path, so);
ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
<< "when added to session with name '" << kQnnExecutionProvider << "'";
ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
<< "QNN EP was not found in registered providers for session "
<< "when added to session with name '" << kQnnExecutionProvider << "'";
}
}

Expand Down Expand Up @@ -1265,6 +1268,24 @@ TEST_F(QnnHTPBackendTests, LoadingAndUnloadingOfQnnLibrary_FixSegFault) {
}
#endif // !BUILD_QNN_EP_STATIC_LIB

#if defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
// Tests autoEP feature to automatically select an EP that supports the NPU.
// Currently only works on Windows.
TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) {
ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider,
ORT_TSTR("onnxruntime_providers_qnn.dll")));

Ort::SessionOptions so;
so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU);

const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx";
Ort::Session session(*ort_env, ort_model_path, so);
EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider));

ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
}
#endif // defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB

#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
#endif // !defined(ORT_MINIMAL_BUILD)

Expand Down
Loading