pytorch
diff --git a/‎backends/xnnpack/_passes/__init__.py‎
Lines changed: 2 additions & 8 deletions b/‎backends/xnnpack/_passes/__init__.py‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎backends/xnnpack/_passes/fuse_batch_norm.py‎
Lines changed: 232 additions & 0 deletions b/‎backends/xnnpack/_passes/fuse_batch_norm.py‎
Lines changed: 232 additions & 0 deletions
@@ -21,12 +21,7 @@
 )
 from executorch.backends.xnnpack._passes.decompose_cat import DecomposeConcatenate
 from executorch.backends.xnnpack._passes.fuse_activation_pass import FuseActivationPass
-from executorch.backends.xnnpack._passes.fuse_batch_norm_with_conv import (
-    FuseBatchNormWithConvPass,
-)
-from executorch.backends.xnnpack._passes.fuse_batch_norm_with_linear import (
-    FuseBatchNormWithLinearPass,
-)
+from executorch.backends.xnnpack._passes.fuse_batch_norm import FuseBatchNormPass
 from executorch.backends.xnnpack._passes.prelu_reshape_pass import PReLUReshapePass
 from executorch.backends.xnnpack._passes.tag_implicit_q_dq_pass import (
     TagImplicitQDqPass,
@@ -66,8 +61,7 @@ def __init__(
                 ConvertToLinearPass,
                 ConvertToSDPAPass,
                 ConstPropPass,
-                FuseBatchNormWithConvPass,
-                FuseBatchNormWithLinearPass,
+                FuseBatchNormPass,
                 FuseActivationPass,
                 DecomposeConcatenate,
                 RemoveGetItemPass,
 
@@ -0,0 +1,232 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import operator
+
+import torch
+from executorch.backends.transforms.utils import (
+    create_constant_placeholder,
+    delete_constant_placeholder,
+)
+
+from executorch.backends.xnnpack._passes.xnnpack_pass import XNNPACKPass
+
+from executorch.backends.xnnpack.utils.utils import (
+    get_param_tensor,
+    get_tensor_name,
+    is_param_node,
+)
+from executorch.exir import ExportedProgram
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import PassResult
+from torch.export.graph_signature import InputKind
+
+from torch.nn.utils.fusion import fuse_conv_bn_weights, fuse_linear_bn_weights
+
+
+class FuseBatchNormPass(XNNPACKPass):
+    """
+    BatchNorm can be implemented using 1x1 Depthwise Convolution. However, doing so will increase
+    memory usage since we serialize new weights to represent the convolution. In most cases,
+    BatchNorm is used after convolution or linear. The 1x1 depthwise convolution can then be fused
+    with the previous convolution. For linear cases, BatchNorm can be folded into the previous linear layer.
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+        constant_placeholders_to_delete = set()
+        for node in graph.nodes:
+            # We want to discover a chain of conv -> batch_norm or linear -> batch_norm.
+            # Only proceed if the current node is a conv or linear node, and has a single
+            # user/successor.
+            is_conv = node.target == exir_ops.edge.aten.convolution.default
+            is_linear = node.target == exir_ops.edge.aten.linear.default
+
+            if not (is_conv or is_linear):
+                continue
+            if len(node.users) != 1:
+                continue
+
+            # Conv or linear op to fuse.
+            target_op = node
+
+            # The single user of the op must be batch_norm. If not, bail.
+            bn = list(target_op.users.keys())[0]
+            if (
+                bn.target != exir_ops.edge.aten.native_batch_norm.default
+                and bn.target
+                != exir_ops.edge.aten._native_batch_norm_legit_no_training.default
+            ):
+                continue
+
+            if not self.can_fuse(target_op, bn, self.exported_program):
+                continue
+
+            self._fuse_ops(
+                graph_module,
+                graph,
+                target_op,
+                bn,
+                is_conv,
+                constant_placeholders_to_delete,
+            )
+
+        if len(constant_placeholders_to_delete) > 0:
+            graph_module.graph.eliminate_dead_code()
+            for node in constant_placeholders_to_delete:
+                if (node is not None) and (len(node.users) == 0):
+                    delete_constant_placeholder(self.exported_program, node)
+
+        graph_module.recompile()
+        # To Regenerate metadata and shape information, retrace module.
+        graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, True)
+
+    @staticmethod
+    def can_fuse(
+        target_op: torch.fx.Node, bn: torch.fx.Node, program: ExportedProgram
+    ) -> bool:
+        """
+        Determine whether a batchnorm node can be fused with a preceding conv or linear node.
+        """
+
+        # All the users of batchnorm node must be getitem ops. batchnorm
+        # returns a 3-element tuple. Each user must only access the first
+        # element of the tuple.
+        if [
+            (user.target == operator.getitem and user.args[1] == 0) for user in bn.users
+        ].count(False):
+            return False
+
+        target_op_weights = target_op.args[1]
+        bn_weights = bn.args[1]
+
+        # Check that the weights for conv or linear and batchnorm are both params.
+        if not isinstance(target_op_weights, torch.fx.Node) or not isinstance(
+            bn_weights, torch.fx.Node
+        ):
+            return False
+
+        if [
+            is_param_node(program, node) for node in {target_op_weights, bn_weights}
+        ].count(False):
+            return False
+
+        return True
+
+    def _fuse_ops(
+        self,
+        graph_module: torch.fx.GraphModule,
+        graph: torch.fx.Graph,
+        target_op: torch.fx.Node,
+        bn: torch.fx.Node,
+        is_conv: bool,
+        constant_placeholders_to_delete: set,
+    ) -> None:
+        """
+        Fuse a BatchNorm into the preceding conv or linear op.
+        Update the fused op's weight and bias, rewire users of the BatchNorm's output, and remove the BatchNorm node.
+        """
+
+        if is_conv:
+            assert len(target_op.args) == 9
+        else:  # Linear path: (input, weight, bias).
+            assert len(target_op.args) == 3
+
+        # Get the weight and bias parameters from the conv or linear op.
+        target_op_weight = get_param_tensor(self.exported_program, target_op.args[1])
+        target_op_weight_name = get_tensor_name(
+            self.exported_program, target_op.args[1]
+        )
+        assert target_op_weight is not None
+
+        target_op_bias = get_param_tensor(self.exported_program, target_op.args[2])
+        target_op_bias_name = get_tensor_name(self.exported_program, target_op.args[2])
+
+        # Get the parameters from the batchnorm op.
+        assert (
+            bn.target == exir_ops.edge.aten.native_batch_norm.default
+            and len(bn.args) == 8
+        ) or (
+            bn.target == exir_ops.edge.aten._native_batch_norm_legit_no_training.default
+            and len(bn.args) == 7
+        )
+        bn_weight = get_param_tensor(self.exported_program, bn.args[1])
+        bn_bias = get_param_tensor(self.exported_program, bn.args[2])
+
+        running_mean = get_param_tensor(self.exported_program, bn.args[3])
+        assert running_mean is not None
+
+        running_var = get_param_tensor(self.exported_program, bn.args[4])
+        assert running_var is not None
+
+        # args[7] for native_batch_norm, but args[6] for
+        # _native_batch_norm_legit_no_training (which doesn't have training
+        # as an arg).
+        eps = bn.args[-1]
+
+        # Compute the updated weight and bias after fusing conv or linear op with batchnorm op.
+        fuse_args = (
+            target_op_weight,
+            target_op_bias,
+            running_mean,
+            running_var,
+            eps,
+            bn_weight,
+            bn_bias,
+        )
+
+        if is_conv:
+            is_transpose = target_op.args[6]
+            fused_weight, fused_bias = fuse_conv_bn_weights(*fuse_args, is_transpose)
+        else:  # Linear path.
+            fused_weight, fused_bias = fuse_linear_bn_weights(*fuse_args)
+
+        fused_weight_name = (target_op_weight_name + "_fused_bn").replace(".", "_")
+        if target_op_bias_name == "":
+            fused_bias_name = (target_op_weight_name + "_bias_fused_bn").replace(
+                ".", "_"
+            )
+        else:
+            fused_bias_name = (target_op_bias_name + "_fused_bn").replace(".", "_")
+
+        # Modify the graph by updating the weight and bias of conv or linear op
+        # with the fused weight and bias params, and replacing all the users
+        # of getitem(batchnorm) with the conv or linear op.
+        with graph.inserting_before(target_op.args[1]):
+            fused_op_weight_node = create_constant_placeholder(
+                exp_program=self.exported_program,
+                graph=graph_module.graph,
+                kind=InputKind.PARAMETER,
+                name=fused_weight_name,
+                data=fused_weight,
+            )
+            if fused_bias is not None:
+                fused_op_bias_node = create_constant_placeholder(
+                    exp_program=self.exported_program,
+                    graph=graph_module.graph,
+                    kind=InputKind.PARAMETER,
+                    name=fused_bias_name,
+                    data=fused_bias,
+                )
+            else:
+                fused_op_bias_node = None
+
+            # Replace weight and bias with the fused batchnorm values.
+            args = list(target_op.args)
+            args[1] = fused_op_weight_node
+            args[2] = fused_op_bias_node
+            target_op.args = tuple(args)
+
+            # Remove any use of batchnorm from the graph
+            for user in bn.users.copy():
+                assert user.target == operator.getitem
+                user.replace_all_uses_with(target_op)
+                graph.erase_node(user)
+
+            graph.erase_node(bn)
+            constant_placeholders_to_delete.update(target_op.args[1:3] + bn.args[1:5])