diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc index 08c92be695511..23811c200213a 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc @@ -87,8 +87,8 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, } ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type(); - bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); - ORT_RETURN_IF(!is_npu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"), + bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType()); + ORT_RETURN_IF(is_cpu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"), "QNN EP: Data type ", input_data_type->c_str(), " is not supported for Conv operator in CPU backend."); @@ -112,6 +112,7 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, } // Validate that weight is signed type for per-channel quantization (required by QNN docs). + bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); if (is_npu_backend) { const auto& input_1 = inputs[1]; // weight bool is_per_axis_quant = false; diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc index 5e173b7aff030..347f0651069dc 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc @@ -222,7 +222,8 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1], "QNN EP: Resize may only change the spatial dimensions."); - if (!is_npu_backend) { + const bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType()); + if (is_cpu_backend) { ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type(); ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"), "QNN EP: Data type ", input_data_type->c_str(), diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 6b9f6a5e73e0f..089115938b0d3 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -8,8 +8,7 @@ #include #include "QnnOpDef.h" #include "CPU/QnnCpuCommon.h" -// TODO: not exist for Windows yet -// #include "GPU/QnnGpuCommon.h" +#include "GPU/QnnGpuCommon.h" #include "DSP/QnnDspCommon.h" #include "HTP/QnnHtpCommon.h" #include "HTP/QnnHtpContext.h" @@ -171,10 +170,9 @@ void QnnBackendManager::SetQnnBackendType(uint32_t backend_id) { case QNN_BACKEND_ID_CPU: qnn_backend_type_ = QnnBackendType::CPU; break; - // TODO: update once it's ready for Widows - // case QNN_BACKEND_ID_GPU: - // qnn_backend_type_ = QnnBackendType::GPU; - // break; + case QNN_BACKEND_ID_GPU: + qnn_backend_type_ = QnnBackendType::GPU; + break; case QNN_BACKEND_ID_DSP: qnn_backend_type_ = QnnBackendType::DSP; break; @@ -617,16 +615,31 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) { QnnContext_Config_t context_priority_config = QNN_CONTEXT_CONFIG_INIT; ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, context_priority_config)); + const QnnContext_Config_t* npu_context_configs[] = {&context_priority_config, &context_config_weight_sharing, nullptr}; const QnnContext_Config_t* empty_context_configs[] = {nullptr}; - bool is_npu_backend = IsNpuBackend(GetQnnBackendType()); + + const QnnContext_Config_t** configs = nullptr; + switch (GetQnnBackendType()) { + case QnnBackendType::HTP: + case QnnBackendType::DSP: + configs = npu_context_configs; + break; + case QnnBackendType::GPU: + // Currently only this works with QnnGpu. + configs = nullptr; + break; + default: + configs = empty_context_configs; + break; + } Qnn_ContextHandle_t context = nullptr; Qnn_ErrorHandle_t result = qnn_interface_.contextCreate(backend_handle_, device_handle_, - is_npu_backend ? npu_context_configs : empty_context_configs, + configs, &context); ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context. Error: ", QnnErrorHandleToString(result)); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc index 3380436fcf156..d3a086ea1bc9f 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.cc @@ -598,5 +598,18 @@ bool IsNpuBackend(QnnBackendType backend_type) { return backend_type == QnnBackendType::HTP || backend_type == QnnBackendType::DSP; } +bool IsGpuBackend(QnnBackendType backend_type) { + return backend_type == QnnBackendType::GPU; +} + +bool IsCpuBackend(QnnBackendType backend_type) { + return backend_type == QnnBackendType::CPU; +} + +// Is it Qualcomm hardware ? +bool IsQpuBackend(QnnBackendType backend_type) { + return IsNpuBackend(backend_type) || IsGpuBackend(backend_type); +} + } // namespace qnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 4088670bc6217..acd0db09cdd1f 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -73,8 +73,14 @@ enum class QnnBackendType : uint8_t { HTP_FP16 }; +bool IsCpuBackend(QnnBackendType backend_type); + bool IsNpuBackend(QnnBackendType backend_type); +bool IsGpuBackend(QnnBackendType backend_type); + +bool IsQpuBackend(QnnBackendType backend_type); + // constexpr config values constexpr const int kSleepMinLatency = 40; constexpr const int kSleepLowLatency = 100; diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index 142a7362ad6ae..c35e8c16c32cb 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -789,8 +789,8 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer return result; } - if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) { - LOGS(logger, ERROR) << "Qnn context cache only works for HTP or DSP backend."; + if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsQpuBackend(qnn_backend_manager_->GetQnnBackendType())) { + LOGS(logger, ERROR) << "Qnn context cache only works for HTP/DSP/GPU backend."; return result; } diff --git a/setup.py b/setup.py index 53e533050b245..5fc78963eca9a 100644 --- a/setup.py +++ b/setup.py @@ -400,6 +400,7 @@ def finalize_options(self): # QNN V68/V73 dependencies qnn_deps = [ "QnnCpu.dll", + "QnnGpu.dll", "QnnHtp.dll", "QnnSaver.dll", "QnnSystem.dll", diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml index 046c737a2b151..f15a2992e0d00 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml @@ -66,6 +66,7 @@ steps: copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libQnnHtp*.so $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libqnnhtp*.cat $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnCpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib + copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnGpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtp.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpPrepare.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV68Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 8ccb2c054900e..1b7b5f5bc7092 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -552,6 +552,7 @@ def generate_files(line_list, args): if is_qnn_package: files_list.append("") + files_list.append("") files_list.append("") files_list.append("") if args.target_architecture != "x64":