onnxruntime · qti-yuduo · Feb 25, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 24, 2026
@@ -767,6 +767,17 @@ QnnEp::QnnEp(QnnEpFactory& factory,
                 ("BF16 mode enabled with compatible hardware: SoC " + std::to_string(soc_model)).c_str());
   }
 
+  // Enforce SoC model to be set on x86_64 Linux (simulator) when enable FP16.
+#if defined(__linux__) && !defined(__aarch64__)
+  if (enable_HTP_FP16_precision_ && soc_model == QNN_SOC_MODEL_UNKNOWN) {
+    const std::string message =
+        "FP16 precision mode is enabled but soc_model is not specified. "
+        "Both parameters must be set together for FP16 precision support.";
+    ORT_CXX_LOG(logger_, ORT_LOGGING_LEVEL_ERROR, message.c_str());
+    throw std::runtime_error(message);
+  }
+#endif
+
   if (disable_cpu_ep_fallback_ && model_settings_.offload_graph_io_quantization) {
     ORT_CXX_LOG(logger_,
                 ORT_LOGGING_LEVEL_INFO,

@@ -367,8 +367,11 @@ TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm2D_U16U16S32) {
 
 // Test FP16 BatchNormalization on the HTP backend.
 TEST_F(QnnHTPBackendTests, BatchNorm_FP16) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
-
+#if defined(_WIN32)
+  if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+    GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+  }
+#endif
   constexpr int64_t num_channels = 2;
   std::vector<float> input_data = {-8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 1.1f, 3.3f, 8.0f,
                                    -7.0f, -5.0f, -3.0f, -1.0f, 0.0f, 2.1f, 4.3f, 7.0f};
@@ -382,10 +385,16 @@ TEST_F(QnnHTPBackendTests, BatchNorm_FP16) {
 // Test FP32 BatchNormalization on the HTP backend with the enable_htp_fp16_precision option enabled
 // to run it with fp16 precision.
 TEST_F(QnnHTPBackendTests, BatchNorm_FP32_as_FP16) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
   ProviderOptions provider_options;
-
   provider_options["backend_type"] = "htp";
+#if defined(_WIN32)
+  if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+    GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+  }
+#endif
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
   provider_options["enable_htp_fp16_precision"] = "1";
 
   constexpr int64_t num_channels = 2;

@@ -58,7 +58,7 @@ namespace test {
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
   provider_options["htp_bf16_enable"] = "1";  // Enable BF16 mode
-  provider_options["soc_model"] = "88";       // Target SOC ID for BF16 support
+  provider_options["soc_model"] = "88";       // TODO: Use QnnTypes.h when it's availible
   provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(build_test_case, provider_options, opset, expected_ep_assignment, fp32_abs_err);

@@ -58,6 +58,14 @@ static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::Ten
 
   if (backend_name == "htp") {
     if (enable_fp16_precision) {
+#if defined(_WIN32)
+      if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+        GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+      }
+#endif
+#if defined(__linux__) && !defined(__aarch64__)
+      provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
       provider_options["enable_htp_fp16_precision"] = "1";
     } else {
       provider_options["enable_htp_fp16_precision"] = "0";
@@ -74,6 +82,11 @@ static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::Ten
 static void RunCastFP16HTPTest(const std::vector<int64_t>& shape,
                                ONNX_NAMESPACE::TensorProto_DataType dst_type,
                                ExpectedEPNodeAssignment expected_ep_assignment) {
+#if defined(_WIN32)
+  if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+    GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+  }
+#endif
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -163,7 +176,6 @@ TEST_F(QnnHTPBackendTests, TestCastFloatToBoolHTP) {
 
 // Cast float16 to bool on HTP.
 TEST_F(QnnHTPBackendTests, TestCastFloat16ToBoolHTP) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
   RunCastFP16HTPTest({3, 3},
                      ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL,
                      ExpectedEPNodeAssignment::All);

@@ -334,8 +334,11 @@ TEST_F(QnnHTPBackendTests, Clip_U8_QuantizedMinMax) {
 
 // Test FP16 Clip with min (FP16)
 TEST_F(QnnHTPBackendTests, Clip_FP16) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
-
+#if defined(_WIN32)
+  if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+    GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+  }
+#endif
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
 

@@ -27,7 +27,9 @@ static void RunCumSumOpTest(const std::string& op_type,
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
   provider_options["offload_graph_io_quantization"] = "0";
-  provider_options["soc_model"] = "87";
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
 
   // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs.
   RunQnnModelTest(BuildOpTestCase<InputType1, InputType2>(op_type, {input_def_1}, {input_def_2}, attrs),
@@ -38,7 +40,6 @@ static void RunCumSumOpTest(const std::string& op_type,
 }
 
 // Non-QDQ model, CumSum with float input and axis input as initializer with axis 0
-// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
 TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_0) {
   RunCumSumOpTest<float, int32_t>("CumSum",
                                   TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),
@@ -50,7 +51,6 @@ TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_0) {
 }
 
 // Non-QDQ model, CumSum with float input and axis input as initializer with axis -1
-// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
 TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_neg1) {
   RunCumSumOpTest<float, int32_t>("CumSum",
                                   TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),
@@ -62,7 +62,6 @@ TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_neg1) {
 }
 
 // Test int64 axis
-// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
 TEST_F(QnnHTPBackendTests, CumSum_float_int64_e0_r0_axis_1) {
   RunCumSumOpTest<float, int64_t>("CumSum",
                                   TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),

@@ -28,6 +28,14 @@ static void RunFusedMatMulTest(const TestInputDef<DataType>& input_a_def,
   provider_options["backend_type"] = backend_name;
 
   if (backend_name == "htp") {
+#if defined(_WIN32)
+    if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+      GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+    }
+#endif
+#if defined(__linux__) && !defined(__aarch64__)
+    provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
     provider_options["enable_htp_fp16_precision"] = "1";
   }
 

@@ -37,6 +37,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_Default) {
 
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
 
   RunQnnModelTest(build_test_case,
                   provider_options,
@@ -102,6 +105,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_MultipleGroups) {
 
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
 
   RunQnnModelTest(build_test_case,
                   provider_options,
@@ -132,6 +138,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_LargeEpsilon) {
 
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
 
   RunQnnModelTest(build_test_case,
                   provider_options,
@@ -163,6 +172,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_3D) {
 
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
+#if defined(__linux__) && !defined(__aarch64__)
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
 
   RunQnnModelTest(build_test_case,
                   provider_options,

@@ -28,7 +28,7 @@ static void RunInverseTest(const std::vector<TestInputDef<DataType>>& input_defs
 
   provider_options["backend_type"] = backend_name;
   provider_options["offload_graph_io_quantization"] = "0";
-  provider_options["soc_model"] = "30";
+  provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8350);
 
   RunQnnModelTest(BuildOpTestCase<DataType>("Inverse", input_defs, {}, attrs, kMSDomain),  // Inverse Op exist in kMSDomain
                   provider_options,

@@ -57,8 +57,11 @@ TEST_F(QnnHTPBackendTests, LeakyReluOpSet16) {
 
 // Test Leaky Relu where input is FP16 and alpha is FP32
 TEST_F(QnnHTPBackendTests, LeakyReluFP16OpSet16) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
-
+#if defined(_WIN32)
+  if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+    GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+  }
+#endif
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
   provider_options["offload_graph_io_quantization"] = "0";

@@ -159,6 +159,14 @@ static void RunQDQPerChannelMatMulOpTest(
   provider_options["offload_graph_io_quantization"] = "0";
 
   if (enable_fp16_precision) {
+#if defined(_WIN32)
+    if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+      GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+    }
+#endif
+#if defined(__linux__) && !defined(__aarch64__)
+    provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
     provider_options["enable_htp_fp16_precision"] = "1";
   } else {
     provider_options["enable_htp_fp16_precision"] = "0";

@@ -139,6 +139,14 @@ static void RunPadOpTest(const TestInputDef<T>& data_def,
   provider_options["offload_graph_io_quantization"] = "0";
 
   if (enable_fp16_precision) {
+#if defined(_WIN32)
+    if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
+      GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
+    }
+#endif
+#if defined(__linux__) && !defined(__aarch64__)
+    provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
+#endif
     provider_options["enable_htp_fp16_precision"] = "1";
   }
 
@@ -327,7 +335,7 @@ TEST_F(QnnCPUBackendTests, Pad6d) {
 #if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 //
 // HTP tests:
-TEST_F(QnnHTPBackendTests, PadNoConstantValue_fp16_test) {
+TEST_F(QnnHTPBackendTests, PadNoConstantValue_FP32_as_FP16) {
   bool has_constant_value_input = false;
   bool enable_fp16_precision = true;
   RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
@@ -343,9 +351,7 @@ TEST_F(QnnHTPBackendTests, PadNoConstantValue_fp16_test) {
 }
 
 // Test MLFlaot 16 Constant = 0
-TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_0_test) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
-
+TEST_F(QnnHTPBackendTests, PadConstantValue_FP16_0) {
   bool has_constant_value_input = true;
   bool enable_fp16_precision = true;
   // Onnx expects data and constant have same dtype.
@@ -367,9 +373,7 @@ TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_0_test) {
 
 // Test MLFlaot 16 Constant = 1
 // Should not be assigned to htp since HTP only support fp16 with constant = 0.
-TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_1_test) {
-  QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
-
+TEST_F(QnnHTPBackendTests, PadConstantValue_FP16_1) {
   bool has_constant_value_input = true;
   bool enable_fp16_precision = true;
   // Onnx expects data and constant have same dtype.
@@ -389,7 +393,7 @@ TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_1_test) {
                           2e-3f);
 }
 
-TEST_F(QnnHTPBackendTests, PadReflectMode_fp16) {
+TEST_F(QnnHTPBackendTests, PadReflectMode_FP32_as_FP16) {
   bool has_constant_value_input = true;
   bool enable_fp16_precision = true;
   RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
@@ -410,7 +414,7 @@ TEST_F(QnnHTPBackendTests, PadReflectMode_fp16) {
 // QnnDsp <E> "node" generated: could not create op
 // QnnDsp <E> RouterWindows graph prepare failed 12
 // QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
-TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP16_big_data) {
+TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP32_as_FP16_big_data) {
   bool has_constant_value_input = true;
   bool enable_fp16_precision = true;
   RunPadOpTest(TestInputDef<float>({1, 4, 512, 512}, false, GetFloatDataInRange(1.0f, 10.0f, 4 * 512 * 512)),
@@ -425,7 +429,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP16_big_data) {
                2e-3f);
 }
 
-TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_fp16_test) {
+TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_FP32_as_FP16) {
   bool has_constant_value_input = false;
   bool enable_fp16_precision = true;
   RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
@@ -440,7 +444,7 @@ TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_fp16_test) {
                2e-3f);
 }
 
-TEST_F(QnnHTPBackendTests, PadNoConstantMixValue_fp16_test) {
+TEST_F(QnnHTPBackendTests, PadNoConstantMixValue_FP32_as_FP16) {
   bool has_constant_value_input = false;
   bool enable_fp16_precision = true;
   RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),