diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc index ee7782e3c8763..91961bf22ce1e 100644 --- a/onnxruntime/core/common/cpuid_info.cc +++ b/onnxruntime/core/common/cpuid_info.cc @@ -155,7 +155,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) { uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) { if (vendor == "GenuineIntel") return 0x8086; if (vendor == "GenuineAMD") return 0x1022; - if (vendor.find("Qualcomm") == 0) return 'Q' << 24 | 'C' << 16 | 'O' << 8 | 'M'; + if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24); if (vendor.find("NV") == 0) return 0x10DE; return 0; } diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc index 7b92a23e428eb..b2f289448b013 100644 --- a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc +++ b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc @@ -79,6 +79,27 @@ struct QNN_Provider : Provider { return std::make_shared(*provider_options, config_options); } + Status CreateIExecutionProvider(const OrtHardwareDevice* const* /*devices*/, + const OrtKeyValuePairs* const* /*ep_metadata*/, + size_t num_devices, + ProviderOptions& provider_options, + const OrtSessionOptions& session_options, + const OrtLogger& logger, + std::unique_ptr& ep) override { + if (num_devices != 1) { + return Status(common::ONNXRUNTIME, ORT_EP_FAIL, "QNN EP only supports one device."); + } + + const ConfigOptions* config_options = &session_options.GetConfigOptions(); + + std::array configs_array = {&provider_options, config_options}; + const void* arg = reinterpret_cast(&configs_array); + auto ep_factory = CreateExecutionProviderFactory(arg); + ep = ep_factory->CreateProvider(session_options, logger); + + return Status::OK(); + } + void Initialize() override {} void Shutdown() override {} } g_provider; @@ -93,4 +114,121 @@ ORT_API(onnxruntime::Provider*, GetProvider) { return &onnxruntime::g_provider; } } + +#include "core/framework/error_code_helper.h" + +// OrtEpApi infrastructure to be able to use the QNN EP as an OrtEpFactory for auto EP selection. +struct QnnEpFactory : OrtEpFactory { + QnnEpFactory(const OrtApi& ort_api_in, + const char* ep_name, + OrtHardwareDeviceType hw_type, + const char* qnn_backend_type) + : ort_api{ort_api_in}, ep_name{ep_name}, ort_hw_device_type{hw_type}, qnn_backend_type{qnn_backend_type} { + GetName = GetNameImpl; + GetVendor = GetVendorImpl; + GetSupportedDevices = GetSupportedDevicesImpl; + CreateEp = CreateEpImpl; + ReleaseEp = ReleaseEpImpl; + } + + // Returns the name for the EP. Each unique factory configuration must have a unique name. + // Ex: a factory that supports NPU should have a different than a factory that supports GPU. + static const char* GetNameImpl(const OrtEpFactory* this_ptr) { + const auto* factory = static_cast(this_ptr); + return factory->ep_name.c_str(); + } + + static const char* GetVendorImpl(const OrtEpFactory* this_ptr) { + const auto* factory = static_cast(this_ptr); + return factory->vendor.c_str(); + } + + // Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports. + // An EP created with this factory is expected to be able to execute a model with *all* supported + // hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among + // multiple different QNN backends at once (e.g, npu, cpu, gpu), so this factory instance is set to only + // support one backend: npu. To support a different backend, like gpu, create a different factory instance + // that only supports GPU. + static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr, + const OrtHardwareDevice* const* devices, + size_t num_devices, + OrtEpDevice** ep_devices, + size_t max_ep_devices, + size_t* p_num_ep_devices) { + size_t& num_ep_devices = *p_num_ep_devices; + auto* factory = static_cast(this_ptr); + + for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) { + const OrtHardwareDevice& device = *devices[i]; + if (factory->ort_api.HardwareDevice_Type(&device) == factory->ort_hw_device_type && + factory->ort_api.HardwareDevice_VendorId(&device) == factory->vendor_id) { + OrtKeyValuePairs* ep_options = nullptr; + factory->ort_api.CreateKeyValuePairs(&ep_options); + factory->ort_api.AddKeyValuePair(ep_options, "backend_type", factory->qnn_backend_type.c_str()); + ORT_API_RETURN_IF_ERROR( + factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options, + &ep_devices[num_ep_devices++])); + } + } + + return nullptr; + } + + static OrtStatus* CreateEpImpl(OrtEpFactory* /*this_ptr*/, + _In_reads_(num_devices) const OrtHardwareDevice* const* /*devices*/, + _In_reads_(num_devices) const OrtKeyValuePairs* const* /*ep_metadata*/, + _In_ size_t /*num_devices*/, + _In_ const OrtSessionOptions* /*session_options*/, + _In_ const OrtLogger* /*logger*/, + _Out_ OrtEp** /*ep*/) { + return onnxruntime::CreateStatus(ORT_INVALID_ARGUMENT, "QNN EP factory does not support this method."); + } + + static void ReleaseEpImpl(OrtEpFactory* /*this_ptr*/, OrtEp* /*ep*/) { + // no-op as we never create an EP here. + } + + const OrtApi& ort_api; + const std::string ep_name; // EP name + const std::string vendor{"Microsoft"}; // EP vendor name + + // Qualcomm vendor ID. Refer to the ACPI ID registry (search Qualcomm): https://uefi.org/ACPI_ID_List + const uint32_t vendor_id{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)}; + const OrtHardwareDeviceType ort_hw_device_type; // Supported OrtHardwareDevice + const std::string qnn_backend_type; // QNN backend type for OrtHardwareDevice +}; + +extern "C" { +// +// Public symbols +// +OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase* ort_api_base, + OrtEpFactory** factories, size_t max_factories, size_t* num_factories) { + const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION); + + // Factory could use registration_name or define its own EP name. + auto factory_npu = std::make_unique(*ort_api, + onnxruntime::kQnnExecutionProvider, + OrtHardwareDeviceType_NPU, "htp"); + + // If want to support GPU, create a new factory instance because QNN EP is not currently setup to partition a single model + // among heterogeneous devices. + // std::unique_ptr factory_gpu = std::make_unique(*ort_api, "QNNExecutionProvider_GPU", OrtHardwareDeviceType_GPU, "gpu"); + + if (max_factories < 1) { + return ort_api->CreateStatus(ORT_INVALID_ARGUMENT, + "Not enough space to return EP factory. Need at least one."); + } + + factories[0] = factory_npu.release(); + *num_factories = 1; + + return nullptr; +} + +OrtStatus* ReleaseEpFactory(OrtEpFactory* factory) { + delete static_cast(factory); + return nullptr; +} +} #endif // !BUILD_QNN_EP_STATIC_LIB diff --git a/onnxruntime/core/providers/qnn/symbols.def b/onnxruntime/core/providers/qnn/symbols.def index 4ec2f7914c208..3afed01da1966 100644 --- a/onnxruntime/core/providers/qnn/symbols.def +++ b/onnxruntime/core/providers/qnn/symbols.def @@ -1,2 +1,4 @@ EXPORTS GetProvider + CreateEpFactories + ReleaseEpFactory diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index f736abcd3006d..0212dacadbced 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -16,6 +16,7 @@ #include "core/session/onnxruntime_run_options_config_keys.h" #include "test/providers/qnn/qnn_test_utils.h" +#include "test/util/include/api_asserts.h" #include "gtest/gtest.h" #include "gmock/gmock.h" @@ -37,24 +38,24 @@ namespace test { // TODO: When we need QNN in a minimal build we should add an ORT format version of the model #if !defined(ORT_MINIMAL_BUILD) +static bool SessionHasEp(Ort::Session& session, const char* ep_name) { + // Access the underlying InferenceSession. + const OrtSession* ort_session = session; + const InferenceSession* s = reinterpret_cast(ort_session); + bool has_ep = false; + + for (const auto& provider : s->GetRegisteredProviderTypes()) { + if (provider == ep_name) { + has_ep = true; + break; + } + } + return has_ep; +} + // Tests that the QNN EP is registered when added via the public C++ API. // Loads a simple ONNX model that adds floats. TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) { - auto session_has_qnn_ep = [](Ort::Session& session) -> bool { - // Access the underlying InferenceSession. - const OrtSession* ort_session = session; - const InferenceSession* s = reinterpret_cast(ort_session); - bool have_qnn_ep = false; - - for (const auto& provider : s->GetRegisteredProviderTypes()) { - if (provider == kQnnExecutionProvider) { - have_qnn_ep = true; - break; - } - } - return have_qnn_ep; - }; - onnxruntime::ProviderOptions options; #if defined(_WIN32) options["backend_path"] = "QnnHtp.dll"; @@ -77,8 +78,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) { so.AppendExecutionProvider("QNN", options); Ort::Session session(*ort_env, ort_model_path, so); - ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session " - << "when added to session with name 'QNN'"; + ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider)) + << "QNN EP was not found in registered providers for session " + << "providers for session when added to session with name 'QNN'"; } { @@ -92,8 +94,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) { so.AppendExecutionProvider(kQnnExecutionProvider, options); Ort::Session session(*ort_env, ort_model_path, so); - ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session " - << "when added to session with name '" << kQnnExecutionProvider << "'"; + ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider)) + << "QNN EP was not found in registered providers for session " + << "when added to session with name '" << kQnnExecutionProvider << "'"; } } @@ -1265,6 +1268,24 @@ TEST_F(QnnHTPBackendTests, LoadingAndUnloadingOfQnnLibrary_FixSegFault) { } #endif // !BUILD_QNN_EP_STATIC_LIB +#if defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB +// Tests autoEP feature to automatically select an EP that supports the NPU. +// Currently only works on Windows. +TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) { + ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider, + ORT_TSTR("onnxruntime_providers_qnn.dll"))); + + Ort::SessionOptions so; + so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU); + + const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx"; + Ort::Session session(*ort_env, ort_model_path, so); + EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider)); + + ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider)); +} +#endif // defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB + #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) #endif // !defined(ORT_MINIMAL_BUILD)