Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,17 @@ QnnEp::QnnEp(QnnEpFactory& factory,
("BF16 mode enabled with compatible hardware: SoC " + std::to_string(soc_model)).c_str());
}

// Enforce SoC model to be set on x86_64 Linux (simulator) when enable FP16.
#if defined(__linux__) && !defined(__aarch64__)
if (enable_HTP_FP16_precision_ && soc_model == QNN_SOC_MODEL_UNKNOWN) {
const std::string message =
"FP16 precision mode is enabled but soc_model is not specified. "
"Both parameters must be set together for FP16 precision support.";
ORT_CXX_LOG(logger_, ORT_LOGGING_LEVEL_ERROR, message.c_str());
throw std::runtime_error(message);
}
#endif

if (disable_cpu_ep_fallback_ && model_settings_.offload_graph_io_quantization) {
ORT_CXX_LOG(logger_,
ORT_LOGGING_LEVEL_INFO,
Expand Down
17 changes: 13 additions & 4 deletions onnxruntime/test/providers/qnn/batch_norm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,11 @@ TEST_F(QnnHTPBackendTests, DISABLED_BatchNorm2D_U16U16S32) {

// Test FP16 BatchNormalization on the HTP backend.
TEST_F(QnnHTPBackendTests, BatchNorm_FP16) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();

#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
constexpr int64_t num_channels = 2;
std::vector<float> input_data = {-8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 1.1f, 3.3f, 8.0f,
-7.0f, -5.0f, -3.0f, -1.0f, 0.0f, 2.1f, 4.3f, 7.0f};
Expand All @@ -382,10 +385,16 @@ TEST_F(QnnHTPBackendTests, BatchNorm_FP16) {
// Test FP32 BatchNormalization on the HTP backend with the enable_htp_fp16_precision option enabled
// to run it with fp16 precision.
TEST_F(QnnHTPBackendTests, BatchNorm_FP32_as_FP16) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
ProviderOptions provider_options;

provider_options["backend_type"] = "htp";
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif
provider_options["enable_htp_fp16_precision"] = "1";

constexpr int64_t num_channels = 2;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/qnn/bf16_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ namespace test {
ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
provider_options["htp_bf16_enable"] = "1"; // Enable BF16 mode
provider_options["soc_model"] = "88"; // Target SOC ID for BF16 support
provider_options["soc_model"] = "88"; // TODO: Use QnnTypes.h when it's availible
provider_options["offload_graph_io_quantization"] = "0";

RunQnnModelTest(build_test_case, provider_options, opset, expected_ep_assignment, fp32_abs_err);
Expand Down
14 changes: 13 additions & 1 deletion onnxruntime/test/providers/qnn/cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::Ten

if (backend_name == "htp") {
if (enable_fp16_precision) {
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif
provider_options["enable_htp_fp16_precision"] = "1";
} else {
provider_options["enable_htp_fp16_precision"] = "0";
Expand All @@ -74,6 +82,11 @@ static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::Ten
static void RunCastFP16HTPTest(const std::vector<int64_t>& shape,
ONNX_NAMESPACE::TensorProto_DataType dst_type,
ExpectedEPNodeAssignment expected_ep_assignment) {
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
Expand Down Expand Up @@ -163,7 +176,6 @@ TEST_F(QnnHTPBackendTests, TestCastFloatToBoolHTP) {

// Cast float16 to bool on HTP.
TEST_F(QnnHTPBackendTests, TestCastFloat16ToBoolHTP) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();
RunCastFP16HTPTest({3, 3},
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL,
ExpectedEPNodeAssignment::All);
Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/test/providers/qnn/clip_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,11 @@ TEST_F(QnnHTPBackendTests, Clip_U8_QuantizedMinMax) {

// Test FP16 Clip with min (FP16)
TEST_F(QnnHTPBackendTests, Clip_FP16) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();

#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
ProviderOptions provider_options;
provider_options["backend_type"] = "htp";

Expand Down
7 changes: 3 additions & 4 deletions onnxruntime/test/providers/qnn/cumsum_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ static void RunCumSumOpTest(const std::string& op_type,
ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
provider_options["offload_graph_io_quantization"] = "0";
provider_options["soc_model"] = "87";
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif

// Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs.
RunQnnModelTest(BuildOpTestCase<InputType1, InputType2>(op_type, {input_def_1}, {input_def_2}, attrs),
Expand All @@ -38,7 +40,6 @@ static void RunCumSumOpTest(const std::string& op_type,
}

// Non-QDQ model, CumSum with float input and axis input as initializer with axis 0
// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_0) {
RunCumSumOpTest<float, int32_t>("CumSum",
TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),
Expand All @@ -50,7 +51,6 @@ TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_0) {
}

// Non-QDQ model, CumSum with float input and axis input as initializer with axis -1
// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_neg1) {
RunCumSumOpTest<float, int32_t>("CumSum",
TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),
Expand All @@ -62,7 +62,6 @@ TEST_F(QnnHTPBackendTests, CumSum_float_int32_e0_r0_axis_neg1) {
}

// Test int64 axis
// Passed with provider_options["soc_model"] = "87". Failed with default soc_model: 35
TEST_F(QnnHTPBackendTests, CumSum_float_int64_e0_r0_axis_1) {
RunCumSumOpTest<float, int64_t>("CumSum",
TestInputDef<float>({3, 2}, false, {1.3f, 7.2f, 0.4f, 3.4f, 5.7f, 0.8f}),
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/test/providers/qnn/fused_matmul_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ static void RunFusedMatMulTest(const TestInputDef<DataType>& input_a_def,
provider_options["backend_type"] = backend_name;

if (backend_name == "htp") {
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif
provider_options["enable_htp_fp16_precision"] = "1";
}

Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/test/providers/qnn/group_norm_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_Default) {

ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif

RunQnnModelTest(build_test_case,
provider_options,
Expand Down Expand Up @@ -102,6 +105,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_MultipleGroups) {

ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif

RunQnnModelTest(build_test_case,
provider_options,
Expand Down Expand Up @@ -132,6 +138,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_LargeEpsilon) {

ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif

RunQnnModelTest(build_test_case,
provider_options,
Expand Down Expand Up @@ -163,6 +172,9 @@ TEST_F(QnnHTPBackendTests, GroupNorm_Float_3D) {

ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif

RunQnnModelTest(build_test_case,
provider_options,
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/qnn/inverse_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static void RunInverseTest(const std::vector<TestInputDef<DataType>>& input_defs

provider_options["backend_type"] = backend_name;
provider_options["offload_graph_io_quantization"] = "0";
provider_options["soc_model"] = "30";
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8350);

RunQnnModelTest(BuildOpTestCase<DataType>("Inverse", input_defs, {}, attrs, kMSDomain), // Inverse Op exist in kMSDomain
provider_options,
Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ TEST_F(QnnHTPBackendTests, LeakyReluOpSet16) {

// Test Leaky Relu where input is FP16 and alpha is FP32
TEST_F(QnnHTPBackendTests, LeakyReluFP16OpSet16) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();

#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
ProviderOptions provider_options;
provider_options["backend_type"] = "htp";
provider_options["offload_graph_io_quantization"] = "0";
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/test/providers/qnn/matmul_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ static void RunQDQPerChannelMatMulOpTest(
provider_options["offload_graph_io_quantization"] = "0";

if (enable_fp16_precision) {
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif
provider_options["enable_htp_fp16_precision"] = "1";
} else {
provider_options["enable_htp_fp16_precision"] = "0";
Expand Down
26 changes: 15 additions & 11 deletions onnxruntime/test/providers/qnn/pad_op_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@ static void RunPadOpTest(const TestInputDef<T>& data_def,
provider_options["offload_graph_io_quantization"] = "0";

if (enable_fp16_precision) {
#if defined(_WIN32)
if (QnnHTPBackendTests::ShouldSkipIfHtpArchIsLessThanOrEqualTo(QNN_HTP_DEVICE_ARCH_V68)) {
GTEST_SKIP() << "Test requires HTP FP16 support (arch > V68).";
}
#endif
#if defined(__linux__) && !defined(__aarch64__)
provider_options["soc_model"] = std::to_string(QNN_SOC_MODEL_SM8850);
#endif
provider_options["enable_htp_fp16_precision"] = "1";
}

Expand Down Expand Up @@ -327,7 +335,7 @@ TEST_F(QnnCPUBackendTests, Pad6d) {
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
//
// HTP tests:
TEST_F(QnnHTPBackendTests, PadNoConstantValue_fp16_test) {
TEST_F(QnnHTPBackendTests, PadNoConstantValue_FP32_as_FP16) {
bool has_constant_value_input = false;
bool enable_fp16_precision = true;
RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
Expand All @@ -343,9 +351,7 @@ TEST_F(QnnHTPBackendTests, PadNoConstantValue_fp16_test) {
}

// Test MLFlaot 16 Constant = 0
TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_0_test) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();

TEST_F(QnnHTPBackendTests, PadConstantValue_FP16_0) {
bool has_constant_value_input = true;
bool enable_fp16_precision = true;
// Onnx expects data and constant have same dtype.
Expand All @@ -367,9 +373,7 @@ TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_0_test) {

// Test MLFlaot 16 Constant = 1
// Should not be assigned to htp since HTP only support fp16 with constant = 0.
TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_1_test) {
QNN_SKIP_TEST_IF_HTP_FP16_UNSUPPORTED();

TEST_F(QnnHTPBackendTests, PadConstantValue_FP16_1) {
bool has_constant_value_input = true;
bool enable_fp16_precision = true;
// Onnx expects data and constant have same dtype.
Expand All @@ -389,7 +393,7 @@ TEST_F(QnnHTPBackendTests, PadConstantValue_fp16_1_test) {
2e-3f);
}

TEST_F(QnnHTPBackendTests, PadReflectMode_fp16) {
TEST_F(QnnHTPBackendTests, PadReflectMode_FP32_as_FP16) {
bool has_constant_value_input = true;
bool enable_fp16_precision = true;
RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
Expand All @@ -410,7 +414,7 @@ TEST_F(QnnHTPBackendTests, PadReflectMode_fp16) {
// QnnDsp <E> "node" generated: could not create op
// QnnDsp <E> RouterWindows graph prepare failed 12
// QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP16_big_data) {
TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP32_as_FP16_big_data) {
bool has_constant_value_input = true;
bool enable_fp16_precision = true;
RunPadOpTest(TestInputDef<float>({1, 4, 512, 512}, false, GetFloatDataInRange(1.0f, 10.0f, 4 * 512 * 512)),
Expand All @@ -425,7 +429,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_PadReflectMode_FP16_big_data) {
2e-3f);
}

TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_fp16_test) {
TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_FP32_as_FP16) {
bool has_constant_value_input = false;
bool enable_fp16_precision = true;
RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
Expand All @@ -440,7 +444,7 @@ TEST_F(QnnHTPBackendTests, PadNoConstantNegValue_fp16_test) {
2e-3f);
}

TEST_F(QnnHTPBackendTests, PadNoConstantMixValue_fp16_test) {
TEST_F(QnnHTPBackendTests, PadNoConstantMixValue_FP32_as_FP16) {
bool has_constant_value_input = false;
bool enable_fp16_precision = true;
RunPadOpTest(TestInputDef<float>({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.6f}),
Expand Down
Loading