From 1ca46eeb59e7b97a3f8f0dc7fab247f8d6d578f5 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 23 Mar 2026 18:30:30 -0700 Subject: [PATCH 1/4] Handle HardSigmoid in the NCHWc transformer suite --- .../core/optimizer/nchwc_transformer.cc | 12 +++- .../test/optimizer/nchwc_optimizer_test.cc | 71 ++++++++++++++++++- 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/optimizer/nchwc_transformer.cc b/onnxruntime/core/optimizer/nchwc_transformer.cc index 4e03450077718..5d957f7b8a815 100644 --- a/onnxruntime/core/optimizer/nchwc_transformer.cc +++ b/onnxruntime/core/optimizer/nchwc_transformer.cc @@ -881,12 +881,21 @@ void NchwcTransformerImpl::TransformActivation(Node& node) { const bool can_fuse_activation = (node.OpType() == "Relu") || (node.OpType() == "Sigmoid") || - (node.OpType() == "Tanh"); + (node.OpType() == "Tanh") || + (node.OpType() == "HardSigmoid"); if ((nchwc_node.OpType() == "Conv") && (nchwc_node.Domain() == kMSNchwcDomain) && can_fuse_activation && (nchwc_input->starting_original_uses_ == 1) && (graph_utils::GetNodeAttribute(nchwc_node, "activation") == nullptr)) { nchwc_node.AddAttribute("activation", node.OpType()); + if (node.OpType() == "HardSigmoid") { + const auto* alpha_attr = graph_utils::GetNodeAttribute(node, "alpha"); + const auto* beta_attr = graph_utils::GetNodeAttribute(node, "beta"); + InlinedVector activation_params{ + alpha_attr == nullptr ? 0.2f : alpha_attr->f(), + beta_attr == nullptr ? 0.5f : beta_attr->f()}; + nchwc_node.AddAttribute("activation_params", activation_params); + } FuseNchwcArgument(node, *nchwc_input); removed_nodes_.push_front(node.Index()); } else { @@ -1265,6 +1274,7 @@ void NchwcTransformerImpl::Transform(Node& node) { } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Concat", {4, 11, 13})) { TransformConcat(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Relu", {6, 13, 14}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "HardSigmoid", {6, 22}) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "Sigmoid", {6, 13}) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || diff --git a/onnxruntime/test/optimizer/nchwc_optimizer_test.cc b/onnxruntime/test/optimizer/nchwc_optimizer_test.cc index 6078660bf0d6e..748f0c070ed46 100644 --- a/onnxruntime/test/optimizer/nchwc_optimizer_test.cc +++ b/onnxruntime/test/optimizer/nchwc_optimizer_test.cc @@ -1435,18 +1435,85 @@ TEST(NchwcOptimizerTests, Activation) { }; // Verify that the optimizer doesn't add reorders for these activations in - // this pattern. Relu/Sigmoid/Tanh are generally fusable with a + // this pattern. Relu/Sigmoid/Tanh/HardSigmoid are generally fusable with a // preceding convolution, but not here because the Conv output is consumed // both by the activation node and directly by the Add node. Gelu/QuickGelu // are also expected to remain as separate nodes. test_case("Relu"); test_case("Sigmoid"); test_case("Tanh"); + test_case("HardSigmoid"); test_case("Gelu", kMSDomain); test_case("QuickGelu", kMSDomain); } -TEST(NchwcOptimizerTests, ActivationSingleConsumerConvGuard) { +TEST(NchwcOptimizerTests, ActivationSingleConsumerConvFusion) { + constexpr float kHardSigmoidAlpha = 0.125f; + constexpr float kHardSigmoidBeta = 0.625f; + + auto test_case = [&](const std::string& activation_op_type) { + auto build_test_case = [&](NchwcTestHelper& helper) { + auto* input_arg = helper.MakeInput({1, 48, 11, 15}); + auto* conv1_output_arg = helper.MakeIntermediate(); + auto* activation_output_arg = helper.MakeIntermediate(); + auto* output_arg = helper.MakeOutput(); + + helper.AddConvNode(input_arg, conv1_output_arg, {32, 48, 3, 3}); + auto& activation_node = helper.AddNode(activation_op_type, {conv1_output_arg}, {activation_output_arg}); + if (activation_op_type == "HardSigmoid") { + activation_node.AddAttribute("alpha", kHardSigmoidAlpha); + activation_node.AddAttribute("beta", kHardSigmoidBeta); + } + helper.AddConvNode(activation_output_arg, output_arg, {16, 32, 1, 1}); + }; + + auto check_nchwc_graph = [&](InferenceSessionWrapper& session) { + auto& graph = session.GetGraph(); + auto op_to_count = CountOpsInGraph(graph); + + EXPECT_EQ(op_to_count["com.microsoft.nchwc.Conv"], 2); + EXPECT_EQ(op_to_count["com.microsoft.nchwc.ReorderInput"], 1); + EXPECT_EQ(op_to_count["com.microsoft.nchwc.ReorderOutput"], 1); + EXPECT_EQ(op_to_count[activation_op_type], 0); + + size_t fused_conv_count = 0; + for (const auto& node : graph.Nodes()) { + if (node.OpType() != "Conv" || node.Domain() != kMSNchwcDomain) { + continue; + } + + const auto& attributes = node.GetAttributes(); + auto activation_it = attributes.find("activation"); + if (activation_it == attributes.end()) { + continue; + } + + fused_conv_count++; + EXPECT_EQ(activation_it->second.s(), activation_op_type); + + auto activation_params_it = attributes.find("activation_params"); + if (activation_op_type == "HardSigmoid") { + ASSERT_NE(activation_params_it, attributes.end()); + ASSERT_EQ(activation_params_it->second.floats_size(), 2); + EXPECT_FLOAT_EQ(activation_params_it->second.floats(0), kHardSigmoidAlpha); + EXPECT_FLOAT_EQ(activation_params_it->second.floats(1), kHardSigmoidBeta); + } else { + EXPECT_EQ(activation_params_it, attributes.end()); + } + } + + EXPECT_EQ(fused_conv_count, 1U); + }; + + NchwcOptimizerTester(build_test_case, check_nchwc_graph); + }; + + for (const auto& activation_op_type : {"Relu", "Sigmoid", "Tanh", "HardSigmoid"}) { + test_case(activation_op_type); + } +} + +TEST(NchwcOptimizerTests, ActivationSingleConsumerConvNoFusion) { auto test_case = [&](const std::string& activation_op_type, const std::string& domain = kOnnxDomain) { auto build_test_case = [&](NchwcTestHelper& helper) { auto* input_arg = helper.MakeInput({1, 48, 11, 15}); From 8ed0f191d78efbb7ed162bca7ad1948327664a09 Mon Sep 17 00:00:00 2001 From: Hari Seshadri Date: Mon, 23 Mar 2026 20:56:34 -0700 Subject: [PATCH 2/4] Handle ONNX domain Gelu --- onnxruntime/core/optimizer/nchwc_transformer.cc | 9 +++++---- onnxruntime/test/optimizer/nchwc_optimizer_test.cc | 14 ++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/optimizer/nchwc_transformer.cc b/onnxruntime/core/optimizer/nchwc_transformer.cc index 5d957f7b8a815..8c5c803d0d651 100644 --- a/onnxruntime/core/optimizer/nchwc_transformer.cc +++ b/onnxruntime/core/optimizer/nchwc_transformer.cc @@ -1274,10 +1274,11 @@ void NchwcTransformerImpl::Transform(Node& node) { } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Concat", {4, 11, 13})) { TransformConcat(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Relu", {6, 13, 14}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "HardSigmoid", {6, 22}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Sigmoid", {6, 13}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "HardSigmoid", {6, 22}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Sigmoid", {6, 13}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {20}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "QuickGelu", {1}, kMSDomain)) { TransformActivation(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "BatchNormalization", {7, 9, 14, 15})) { diff --git a/onnxruntime/test/optimizer/nchwc_optimizer_test.cc b/onnxruntime/test/optimizer/nchwc_optimizer_test.cc index 748f0c070ed46..cd210f7bc70ba 100644 --- a/onnxruntime/test/optimizer/nchwc_optimizer_test.cc +++ b/onnxruntime/test/optimizer/nchwc_optimizer_test.cc @@ -1407,7 +1407,9 @@ TEST(NchwcOptimizerTests, UpsampleLinear) { } TEST(NchwcOptimizerTests, Activation) { - auto test_case = [&](const std::string& activation_op_type, const std::string& domain = kOnnxDomain) { + auto test_case = [&](const std::string& activation_op_type, + const std::string& domain = kOnnxDomain, + int opset_version = 13) { auto build_test_case = [&](NchwcTestHelper& helper) { auto* input_arg = helper.MakeInput({1, 48, 11, 15}); auto* conv1_output_arg = helper.MakeIntermediate(); @@ -1431,7 +1433,7 @@ TEST(NchwcOptimizerTests, Activation) { EXPECT_EQ(op_to_count["Add"], 1); }; - NchwcOptimizerTester(build_test_case, check_nchwc_graph); + NchwcOptimizerTester(build_test_case, check_nchwc_graph, opset_version); }; // Verify that the optimizer doesn't add reorders for these activations in @@ -1443,6 +1445,7 @@ TEST(NchwcOptimizerTests, Activation) { test_case("Sigmoid"); test_case("Tanh"); test_case("HardSigmoid"); + test_case("Gelu", kOnnxDomain, 20); test_case("Gelu", kMSDomain); test_case("QuickGelu", kMSDomain); } @@ -1514,7 +1517,9 @@ TEST(NchwcOptimizerTests, ActivationSingleConsumerConvFusion) { } TEST(NchwcOptimizerTests, ActivationSingleConsumerConvNoFusion) { - auto test_case = [&](const std::string& activation_op_type, const std::string& domain = kOnnxDomain) { + auto test_case = [&](const std::string& activation_op_type, + const std::string& domain = kOnnxDomain, + int opset_version = 13) { auto build_test_case = [&](NchwcTestHelper& helper) { auto* input_arg = helper.MakeInput({1, 48, 11, 15}); auto* conv1_output_arg = helper.MakeIntermediate(); @@ -1544,12 +1549,13 @@ TEST(NchwcOptimizerTests, ActivationSingleConsumerConvNoFusion) { } }; - NchwcOptimizerTester(build_test_case, check_nchwc_graph); + NchwcOptimizerTester(build_test_case, check_nchwc_graph, opset_version); }; // Gelu/QuickGelu must remain separate even with a single-consumer Conv input, // because the NCHWc Conv activation fuse guard only allows a fixed subset of // activations. + test_case("Gelu", kOnnxDomain, 20); test_case("Gelu", kMSDomain); test_case("QuickGelu", kMSDomain); } From 150c0b12731c33d7426955e0d2151474bb3069a9 Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Mon, 23 Mar 2026 21:02:17 -0700 Subject: [PATCH 3/4] Update onnxruntime/core/optimizer/nchwc_transformer.cc Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- onnxruntime/core/optimizer/nchwc_transformer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/optimizer/nchwc_transformer.cc b/onnxruntime/core/optimizer/nchwc_transformer.cc index 8c5c803d0d651..fb300bea42056 100644 --- a/onnxruntime/core/optimizer/nchwc_transformer.cc +++ b/onnxruntime/core/optimizer/nchwc_transformer.cc @@ -1279,7 +1279,7 @@ void NchwcTransformerImpl::Transform(Node& node) { graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {20}) || graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "QuickGelu", {1}, kMSDomain)) { + graph_utils::IsSupportedOptypeVersionAndDomain(node, "QuickGelu", {1}, kMSDomain)) { TransformActivation(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "BatchNormalization", {7, 9, 14, 15})) { TransformBatchNormalization(node); From b62ed91db1fb7bed6fc8a2465af92e33f954cf49 Mon Sep 17 00:00:00 2001 From: Hariharan Seshadri Date: Tue, 24 Mar 2026 01:38:01 -0700 Subject: [PATCH 4/4] Update onnxruntime/core/optimizer/nchwc_transformer.cc Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- onnxruntime/core/optimizer/nchwc_transformer.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/optimizer/nchwc_transformer.cc b/onnxruntime/core/optimizer/nchwc_transformer.cc index fb300bea42056..b9366ff0abae8 100644 --- a/onnxruntime/core/optimizer/nchwc_transformer.cc +++ b/onnxruntime/core/optimizer/nchwc_transformer.cc @@ -1274,12 +1274,12 @@ void NchwcTransformerImpl::Transform(Node& node) { } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Concat", {4, 11, 13})) { TransformConcat(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "Relu", {6, 13, 14}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "HardSigmoid", {6, 22}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Sigmoid", {6, 13}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {20}) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || - graph_utils::IsSupportedOptypeVersionAndDomain(node, "QuickGelu", {1}, kMSDomain)) { + graph_utils::IsSupportedOptypeVersionAndDomain(node, "HardSigmoid", {6, 22}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Sigmoid", {6, 13}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Tanh", {6, 13}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {20}) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "Gelu", {1}, kMSDomain) || + graph_utils::IsSupportedOptypeVersionAndDomain(node, "QuickGelu", {1}, kMSDomain)) { TransformActivation(node); } else if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "BatchNormalization", {7, 9, 14, 15})) { TransformBatchNormalization(node);