Remove addmm node check from linear pass and combine conv/linear fusion test files

keyprocedure · keyprocedure · commit 98a954c37c4a · 2025-07-02T13:14:32.000-07:00
diff --git a/backends/xnnpack/_passes/fuse_batch_norm_with_linear.py b/backends/xnnpack/_passes/fuse_batch_norm_with_linear.py
@@ -32,12 +32,11 @@ def call(self, graph_module: torch.fx.GraphModule):
         graph = graph_module.graph
         constant_placeholders_to_delete = set()
         for linear in graph.nodes:
-            # We want to discover a chain of linear -> batch_norm or addmm -> batch_norm.
-            # Only proceed if the current node is a linear or addmm node, and has a single
+            # We want to discover a chain of linear -> batch_norm.
+            # Only proceed if the current node is a linear node, and has a single
             # user/successor.
             if (
                 linear.target != exir_ops.edge.aten.linear.default
-                and linear.target != exir_ops.edge.aten.addmm.default
                 or len(linear.users) != 1
             ):
                 continue
@@ -51,34 +50,18 @@ def call(self, graph_module: torch.fx.GraphModule):
             ):
                 continue
 
+            if not self.can_fuse(linear, bn, self.exported_program):
+                continue
+
             # Get the parameters
             assert len(linear.args) == 3
 
-            if linear.target == exir_ops.edge.aten.addmm.default:
-                # addmm.args = (bias, input, weight)
-                linear_bias_arg = linear.args[0]
-                linear_input_arg = linear.args[1]
-                # Unwrap permute_copy to access weight parameter node
-                linear_weight_arg = FuseBatchNormWithLinearPass._unwrap_node(
-                    linear.args[2]
-                )
-            else:
-                # linear.args = (input, weight, bias)
-                linear_input_arg = linear.args[0]
-                linear_weight_arg = linear.args[1]
-                linear_bias_arg = linear.args[2]
-
-            if not self.can_fuse(linear_weight_arg, bn, self.exported_program):
-                continue
-
-            linear_weight = get_param_tensor(self.exported_program, linear_weight_arg)
-            linear_weight_name = get_tensor_name(
-                self.exported_program, linear_weight_arg
-            )
+            linear_weight = get_param_tensor(self.exported_program, linear.args[1])
+            linear_weight_name = get_tensor_name(self.exported_program, linear.args[1])
             assert linear_weight is not None
 
-            linear_bias = get_param_tensor(self.exported_program, linear_bias_arg)
-            linear_bias_name = get_tensor_name(self.exported_program, linear_bias_arg)
+            linear_bias = get_param_tensor(self.exported_program, linear.args[2])
+            linear_bias_name = get_tensor_name(self.exported_program, linear.args[2])
 
             # Get the parameters from the batchnorm op
             assert (
@@ -112,12 +95,6 @@ def call(self, graph_module: torch.fx.GraphModule):
                 bn_weight,
                 bn_bias,
             )
-
-            if linear.target == exir_ops.edge.aten.addmm.default:
-                # fuse_linear_bn_weights returns weight [out, in];
-                # permute_copy node was removed, so weight must be transposed to [in, out] for addmm
-                fused_weight = fused_weight.t()
-
             fused_weight_name = (linear_weight_name + "_fused_bn").replace(".", "_")
             if linear_bias_name == "":
                 fused_bias_name = (linear_weight_name + "_bias_fused_bn").replace(
@@ -130,7 +107,7 @@ def call(self, graph_module: torch.fx.GraphModule):
             # with the fused weight and bias params, and replacing all the users
             # of getitem(batchnorm) with the linear op.
 
-            with graph.inserting_before(linear_weight_arg):
+            with graph.inserting_before(linear.args[1]):
                 fused_linear_weight_node = create_constant_placeholder(
                     exp_program=self.exported_program,
                     graph=graph_module.graph,
@@ -149,20 +126,11 @@ def call(self, graph_module: torch.fx.GraphModule):
                 else:
                     fused_linear_bias_node = None
 
-                if linear.target == exir_ops.edge.aten.addmm.default:
-                    # addmm.args = (bias, input, weight)
-                    linear.args = (
-                        fused_linear_bias_node,
-                        linear_input_arg,
-                        fused_linear_weight_node,
-                    )
-                else:
-                    # linear.args = (input, weight, bias)
-                    linear.args = (
-                        linear_input_arg,
-                        fused_linear_weight_node,
-                        fused_linear_bias_node,
-                    )
+                linear.args = (
+                    linear.args[0],
+                    fused_linear_weight_node,
+                    fused_linear_bias_node,
+                )
 
             # Remove any use of batchnorm from the graph
             for user in bn.users.copy():
@@ -187,7 +155,7 @@ def call(self, graph_module: torch.fx.GraphModule):
 
     @staticmethod
     def can_fuse(
-        linear_weights: torch.fx.Node,
+        linear: torch.fx.Node,
         bn: torch.fx.Node,
         program: ExportedProgram,
     ) -> bool:
@@ -206,23 +174,11 @@ def can_fuse(
         bn_weights = bn.args[1]
 
         # Check that the weights for linear and batchnorm are both params
-        if not isinstance(linear_weights, torch.fx.Node) or not isinstance(
+        if not isinstance(linear, torch.fx.Node) or not isinstance(
             bn_weights, torch.fx.Node
         ):
             return False
 
-        if [
-            is_param_node(program, node) for node in {linear_weights, bn_weights}
-        ].count(False):
+        if [is_param_node(program, node) for node in {linear, bn_weights}].count(False):
             return False
-
         return True
-
-    @staticmethod
-    def _unwrap_node(node: torch.fx.Node) -> torch.fx.Node:
-        while node.op == "call_function" and node.target in {
-            exir_ops.edge.aten.permute.default,
-            exir_ops.edge.aten.permute_copy.default,
-        }:
-            node = node.args[0]
-        return node
diff --git a/backends/xnnpack/test/passes/test_batch_norm_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_fusion.py
@@ -11,11 +11,15 @@
 from executorch.backends.xnnpack._passes.fuse_batch_norm_with_conv import (
     FuseBatchNormWithConvPass,
 )
+from executorch.backends.xnnpack._passes.fuse_batch_norm_with_linear import (
+    FuseBatchNormWithLinearPass,
+)
 from executorch.backends.xnnpack.test.tester import RunPasses, Tester
 
 
 class TestBatchNormFusion(unittest.TestCase):
-    PassStage = RunPasses([FuseBatchNormWithConvPass])
+    ConvPassStage = RunPasses([FuseBatchNormWithConvPass])
+    LinearPassStage = RunPasses([FuseBatchNormWithLinearPass])
     bn_name = "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
 
     def setUp(self):
@@ -42,7 +46,22 @@ def forward(self, x):
             y = y + y
             return self.bn(y)
 
-    def test_fp32_batch_norm_fusion(self):
+    class ModelLinearBN(torch.nn.Module):
+        def __init__(self, in_features, out_features):
+            super().__init__()
+            op = torch.nn.Linear
+            self.linear = op(in_features, out_features)
+            self.bn = torch.nn.BatchNorm1d(out_features)
+            self.forward(torch.randn(2, 2) * 2 + 2)  # update the BN stats
+
+        def forward(self, x):
+            y = self.linear(x)
+            y = self.bn(y)
+            y = self.linear(y)
+            y = y + y
+            return self.bn(y)
+
+    def test_fp32_conv_batch_norm_fusion(self):
         for transpose in [False, True]:
             (
                 Tester(
@@ -51,12 +70,12 @@ def test_fp32_batch_norm_fusion(self):
                 )
                 .export()
                 .to_edge()
-                .run_passes(self.PassStage)
+                .run_passes(self.ConvPassStage)
                 .check_count({self.bn_name: 1})
                 .run_method_and_compare_outputs()
             )
 
-    def test_q8_batch_norm_fusion(self):
+    def test_q8_conv_batch_norm_fusion(self):
         for transpose in [False, True]:
             (
                 Tester(
@@ -66,12 +85,12 @@ def test_q8_batch_norm_fusion(self):
                 .quantize()
                 .export()
                 .to_edge()
-                .run_passes(self.PassStage)
+                .run_passes(self.ConvPassStage)
                 .check_count({self.bn_name: 1})
                 .run_method_and_compare_outputs()
             )
 
-    def test_fp32_batch_norm_no_fusion_doesnt_partition(self):
+    def test_fp32_conv_batch_norm_no_fusion_doesnt_partition(self):
         """
         We do not currently support standalone batch norms (i.e. batch norms that are
         not fused with a conv). This is planned, but until implemented, this test ensures
@@ -94,3 +113,39 @@ def forward(self, x):
             .partition()
             .check_count({self.bn_name: 1})
         )
+
+    def test_fp32_linear_batch_norm_fusion(self):
+        (
+            Tester(
+                self.ModelLinearBN(2, 2).eval(),
+                (torch.randn(2, 2),),
+            )
+            .export()
+            .to_edge_transform_and_lower()
+            .check_count({self.bn_name: 1})
+            .run_method_and_compare_outputs()
+        )
+
+    # def test_fp32_linear_batch_norm_no_fusion_doesnt_partition(self):
+    #     """
+    #     We do not currently support standalone batch norms (i.e. batch norms that are
+    #     not fused with a linear). This is planned, but until implemented, this test ensures
+    #     that we do not partition the standalone batch norm and then fail to lower.
+    #     """
+    #
+    #     class BN(torch.nn.Module):
+    #         def __init__(self):
+    #             super().__init__()
+    #             self.bn = torch.nn.BatchNorm1d(2)
+    #
+    #         def forward(self, x):
+    #             return self.bn(x)
+    #
+    #     (
+    #         Tester(BN(), (torch.randn(2, 2),))
+    #         .export()
+    #         .to_edge()
+    #         .check_count({self.bn_name: 1})
+    #         .partition()
+    #         .check_count({self.bn_name: 1})
+    #     )
diff --git a/backends/xnnpack/test/passes/test_batch_norm_linear_fusion.py b/backends/xnnpack/test/passes/test_batch_norm_linear_fusion.py