microsoft · HectorSVC · Apr 16, 2025 · Apr 11, 2025
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
@@ -87,8 +87,8 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   }
 
   ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
-  bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
-  ORT_RETURN_IF(!is_npu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
+  bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  ORT_RETURN_IF(is_cpu_backend && input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
                 "QNN EP: Data type ", input_data_type->c_str(),
                 " is not supported for Conv operator in CPU backend.");
 
@@ -112,6 +112,7 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   }
 
   // Validate that weight is signed type for per-channel quantization (required by QNN docs).
+  bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
   if (is_npu_backend) {
     const auto& input_1 = inputs[1];  // weight
     bool is_per_axis_quant = false;

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/resize_op_builder.cc
@@ -222,7 +222,8 @@ Status ResizeOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1],
                     "QNN EP: Resize may only change the spatial dimensions.");
 
-  if (!is_npu_backend) {
+  const bool is_cpu_backend = IsCpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  if (is_cpu_backend) {
     ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type();
     ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"),
                   "QNN EP: Data type ", input_data_type->c_str(),

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
@@ -8,8 +8,7 @@
 #include <string>
 #include "QnnOpDef.h"
 #include "CPU/QnnCpuCommon.h"
-// TODO: not exist for Windows yet
-// #include "GPU/QnnGpuCommon.h"
+#include "GPU/QnnGpuCommon.h"
 #include "DSP/QnnDspCommon.h"
 #include "HTP/QnnHtpCommon.h"
 #include "HTP/QnnHtpContext.h"
@@ -171,10 +170,9 @@ void QnnBackendManager::SetQnnBackendType(uint32_t backend_id) {
     case QNN_BACKEND_ID_CPU:
       qnn_backend_type_ = QnnBackendType::CPU;
       break;
-      // TODO: update once it's ready for Widows
-      // case QNN_BACKEND_ID_GPU:
-      //  qnn_backend_type_ = QnnBackendType::GPU;
-      //  break;
+    case QNN_BACKEND_ID_GPU:
+      qnn_backend_type_ = QnnBackendType::GPU;
+      break;
     case QNN_BACKEND_ID_DSP:
       qnn_backend_type_ = QnnBackendType::DSP;
       break;
@@ -617,16 +615,31 @@ Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) {
 
   QnnContext_Config_t context_priority_config = QNN_CONTEXT_CONFIG_INIT;
   ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority_, context_priority_config));
+
   const QnnContext_Config_t* npu_context_configs[] = {&context_priority_config,
                                                       &context_config_weight_sharing,
                                                       nullptr};
   const QnnContext_Config_t* empty_context_configs[] = {nullptr};
-  bool is_npu_backend = IsNpuBackend(GetQnnBackendType());
+
+  const QnnContext_Config_t** configs = nullptr;
+  switch (GetQnnBackendType()) {
+    case QnnBackendType::HTP:
+    case QnnBackendType::DSP:
+      configs = npu_context_configs;
+      break;
+    case QnnBackendType::GPU:
+      // Currently only this works with QnnGpu.
+      configs = nullptr;
+      break;
+    default:
+      configs = empty_context_configs;
+      break;
+  }
 
   Qnn_ContextHandle_t context = nullptr;
   Qnn_ErrorHandle_t result = qnn_interface_.contextCreate(backend_handle_,
                                                           device_handle_,
-                                                          is_npu_backend ? npu_context_configs : empty_context_configs,
+                                                          configs,
                                                           &context);
 
   ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to create context. Error: ", QnnErrorHandleToString(result));

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc
@@ -598,5 +598,18 @@ bool IsNpuBackend(QnnBackendType backend_type) {
   return backend_type == QnnBackendType::HTP || backend_type == QnnBackendType::DSP;
 }
 
+bool IsGpuBackend(QnnBackendType backend_type) {
+  return backend_type == QnnBackendType::GPU;
+}
+
+bool IsCpuBackend(QnnBackendType backend_type) {
+  return backend_type == QnnBackendType::CPU;
+}
+
+// Is it Qualcomm hardware ?
+bool IsQpuBackend(QnnBackendType backend_type) {
+  return IsNpuBackend(backend_type) || IsGpuBackend(backend_type);
+}
+
 }  // namespace qnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@@ -73,8 +73,14 @@ enum class QnnBackendType : uint8_t {
   HTP_FP16
 };
 
+bool IsCpuBackend(QnnBackendType backend_type);
+
 bool IsNpuBackend(QnnBackendType backend_type);
 
+bool IsGpuBackend(QnnBackendType backend_type);
+
+bool IsQpuBackend(QnnBackendType backend_type);
+
 // constexpr config values
 constexpr const int kSleepMinLatency = 40;
 constexpr const int kSleepLowLatency = 100;

diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@@ -789,8 +789,8 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
     return result;
   }
 
-  if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsNpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
-    LOGS(logger, ERROR) << "Qnn context cache only works for HTP or DSP backend.";
+  if ((context_cache_enabled_ || is_qnn_ctx_model) && !IsQpuBackend(qnn_backend_manager_->GetQnnBackendType())) {
+    LOGS(logger, ERROR) << "Qnn context cache only works for HTP/DSP/GPU backend.";
     return result;
   }
 

diff --git a/setup.py b/setup.py
@@ -400,6 +400,7 @@ def finalize_options(self):
     # QNN V68/V73 dependencies
     qnn_deps = [
         "QnnCpu.dll",
+        "QnnGpu.dll",
         "QnnHtp.dll",
         "QnnSaver.dll",
         "QnnSystem.dll",

diff --git a/...ld/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml b/...ld/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml
@@ -66,6 +66,7 @@ steps:
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libQnnHtp*.so $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\libqnnhtp*.cat $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib /Y
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnCpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
+          copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnGpu.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtp.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpPrepare.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib
           copy $(Build.BinariesDirectory)\${{parameters.buildConfig}}\${{parameters.buildConfig}}\QnnHtpV68Stub.dll $(Build.BinariesDirectory)\${{parameters.artifactName}}\lib

diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py
@@ -552,6 +552,7 @@ def generate_files(line_list, args):
 
     if is_qnn_package:
         files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnCpu.dll") + runtimes + " />")
+        files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnGpu.dll") + runtimes + " />")
         files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnHtp.dll") + runtimes + " />")
         files_list.append("<file src=" + '"' + os.path.join(args.native_build_path, "QnnSaver.dll") + runtimes + " />")
         if args.target_architecture != "x64":