pytorch
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/__init__.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 86 additions & 2 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 86 additions & 2 deletions
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 213 additions & 0 deletions b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 213 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -88,7 +88,11 @@
 from .insert_int32_casts_after_int64_placeholders import (  # noqa
     InsertInt32CastsAfterInt64PlaceholdersPass,
 )
-from .insert_rescales_pass import InsertRescaleInt32Pass, InsertRescalePass  # noqa
+from .insert_rescales_pass import (  # noqa
+    InsertControlFlowRescalesPass,
+    InsertRescaleInt32Pass,
+    InsertRescalePass,
+)
 from .insert_table_ops import InsertTableOpsPass  # noqa
 from .match_arg_dtype_pass import MatchArgDtypePass  # noqa
 from .match_arg_ranks_pass import MatchArgRanksPass  # noqa
 
@@ -85,6 +85,7 @@
     FuseEqualPlaceholdersPass,
     FuseQuantizedActivationPass,
     FuseViewCopyTransformPass,
+    InsertControlFlowRescalesPass,
     InsertInt32CastsAfterInt64PlaceholdersPass,
     InsertRescaleInt32Pass,
     InsertRescalePass,
@@ -195,6 +196,7 @@ def _tosa_pipeline(
                 # Ticket: MLETORCH-1539
                 DecomposeLinearPass(),
                 InsertRescaleInt32Pass(),
+                InsertControlFlowRescalesPass(),
             ]
         )
 
 
@@ -9,6 +9,7 @@
 
 from typing import cast, Optional, Set, Type
 
+import torch
 from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import (
     get_param_tensor,
@@ -152,6 +153,83 @@ def fold_and_annotate_arg(
                 if len(n.users) == 0:
                     graph_module.graph.erase_node(n)
 
+    def _handle_control_flow_node(self, node: Node, graph_module: GraphModule):
+        """Fold outmost quant nodes inside submodule.
+        placeholders => qs => dqs => ... => qs => dqs => output
+        becomes
+        placeholders => dqs => ... => qs => output,
+        With output_qparams meta in the placeholders, and input_qparams meta in the output node.
+        """
+        match node.target:
+            case torch.ops.higher_order.cond:
+                submodule_nodes = cast(list[Node], node.args[1:3])
+                args = cast(list[Node], node.args[-1])
+            case torch.ops.higher_order.while_loop:
+                submodule_nodes = cast(list[Node], node.args[0:2])
+                args = cast(list[Node], node.args[-2])
+            case _:
+                raise ValueError(f"Unhandled target {node.target}")
+        submodules = (
+            graph_module.get_submodule(str(submodule_node.target))
+            for submodule_node in submodule_nodes
+        )
+        for submodule in submodules:
+            submodule = cast(GraphModule, submodule)
+            output_node = submodule.graph.output_node()
+            output_node.meta["input_qparams"] = {}
+            nodes_to_remove = []
+            arg_id = 0
+            for submodule_node in submodule.graph.nodes:
+                # Remove initial q nodes and ending dq nodes in the module.
+                submodule_node = cast(Node, submodule_node)
+                if (
+                    submodule_node.target in Q_OPS
+                    and list(submodule_node.all_input_nodes)[0].op == "placeholder"
+                ):
+                    input_node = cast(Node, submodule_node.args[0])
+                    input_node.meta["val"] = submodule_node.meta["val"]
+                    quant_args = QuantArgs.from_operator(
+                        submodule_node.target, submodule_node.args
+                    )
+                    input_node.meta["output_qparams"] = {0: quant_args}
+
+                    submodule_node.replace_all_uses_with(input_node)
+                    nodes_to_remove.append(submodule_node)
+                if submodule_node.target in DQ_OPS:
+                    has_non_output_user = False
+                    for user in copy.copy(submodule_node.users):
+                        if user.op != "output":
+                            has_non_output_user = True
+                        else:
+                            input_node = cast(Node, submodule_node.args[0])
+                            submodule_node.replace_all_uses_with(input_node)
+                            arg_index = cast(list[Node], output_node.args[0]).index(
+                                input_node
+                            )
+                            quant_args = QuantArgs.from_operator(
+                                submodule_node.target, submodule_node.args
+                            )
+                            output_node.meta["input_qparams"][arg_index] = quant_args
+
+                    # Remove dq node if it only has the output node as its user.
+                    if not has_non_output_user:
+                        nodes_to_remove.append(submodule_node)
+                # Placeholders without users won't be retraced with correct dtype, do it manually.
+                # Control flow node input is matched to placeholder nodes in the submodule by index.
+                # This means it will break if another pass inserts a placeholder before this pass.
+                if submodule_node.op == "placeholder":
+                    if len(submodule_node.users) == 0:
+                        submodule_node.meta["val"] = args[arg_id].meta["val"]
+                    arg_id += 1
+                    if arg_id > len(args):
+                        raise RuntimeError(
+                            "Submodule had more placeholders than calling node had inputs."
+                            " This is probably due to a placeholder being inserted in a pass."
+                        )
+            for node_to_remove in nodes_to_remove:
+                submodule.graph.erase_node(node_to_remove)
+        return
+
     def call(self, graph_module: GraphModule) -> PassResult:  # noqa: C901
 
         # Loop over the graph nodes and find any node in the 'targeted_ops' list.
@@ -181,8 +259,8 @@ def call(self, graph_module: GraphModule) -> PassResult:  # noqa: C901
             n.meta["input_qparams"] = {}
             n.meta["output_qparams"] = {}
             for i, arg in enumerate(n.args):
-                if isinstance(arg, list):
-                    self.fold_and_annotate_arg(graph_module, n, arg, i)
+                if isinstance(arg, (list, tuple)):
+                    self.fold_and_annotate_arg(graph_module, n, arg, i)  # type: ignore
 
                 elif isinstance(arg, Node):
                     self.fold_and_annotate_arg(graph_module, n, [arg], i)
@@ -211,6 +289,12 @@ def call(self, graph_module: GraphModule) -> PassResult:  # noqa: C901
                 output_dtype = output_qparams[0].dtype
                 set_node_arg(n, "dtype", output_dtype)
 
+            if n.target in (
+                torch.ops.higher_order.cond,
+                torch.ops.higher_order.while_loop,
+            ):
+                self._handle_control_flow_node(n, graph_module)
+
         # retrace the graph to update the fake tensor types
         graph_module = super().call(graph_module).graph_module
 
 
@@ -369,3 +369,216 @@ def call(self, graph_module: GraphModule) -> PassResult:
             graph_module.recompile()
 
         return PassResult(graph_module, modified)
+
+
+class InsertControlFlowRescalesPass(ArmPass):
+    """The quantization parameters for tensors going into and coming out of a submodule are not guaranteed to
+    match the quantization parameters for the corresponding tensors inside the submodule. For example, cond has
+    different annotation on input and output, while the entire graph inside the submodule could be using shared
+    annotation. This pass solves this by inserting rescales in the beginning and end of the submodule
+    that transform the tensor from one set of quantization parameters to another.
+
+    The pass is run by the graph_module containing the control flow operator, but requires that the affected nodes
+    inside the submodule have been q-dq folded and have input/output_qparams meta.
+    """
+
+    _passes_required_after: Set[Type[ExportPass]] = set()
+
+    def _get_input_nodes(self, graph_module: GraphModule):
+        return [node for node in graph_module.graph.nodes if node.op == "placeholder"]
+
+    def _insert_rescale(
+        self,
+        in_qparams: QuantArgs,
+        out_qparams: QuantArgs,
+        from_node: Node,
+        graph_module: GraphModule,
+    ):
+        """Insert a rescale into the graph, inheriting meta from `from_node`.
+        The node is not connected to anything, that is up to the user."""
+
+        new_scales = [
+            in_qparams.get_scale_per_tensor() / out_qparams.get_scale_per_tensor()
+        ]
+
+        rescale_node = create_node(
+            graph_module.graph,
+            exir_ops.backend.tosa.RESCALE.default,
+            (
+                None,
+                out_qparams.dtype,
+                new_scales,
+                in_qparams.get_zp_per_tensor(),  # Old zero point
+                out_qparams.get_zp_per_tensor(),  # New zero point
+            ),
+            from_node=from_node,
+        )
+        return rescale_node
+
+    def _rescale_submodule_inputs(
+        self, submodule: GraphModule, input_qparams_map: Dict[int, QuantArgs]
+    ) -> bool:
+        """Insert rescales at the inputs of `submodule` to match the qparams outside the submodule.
+        Matching the correct qparams gets a bit tricky:
+        Containing module: | submodule:
+              ops => cond  | => placeholders => ...
+
+        The dq->q qparam pair we want to convert to a rescale is:
+        (input qparams of op, output qparams of placeholder)
+        And the rescale is inserted after the placeholder.
+
+        Args:
+            submodule: GraphModule: the GraphModule in which to rescale the inputs.
+            input_qparams_map: A map of input indexes mapping to QuantArgs. Not guaranteed to contain a mapping
+                for every submodule input.
+        Returns:
+            True if at least one rescale was inserted, False otherwise.
+        """
+
+        modified = False
+        input_nodes = self._get_input_nodes(submodule)
+        for qargs_index in input_qparams_map:
+            input_node = input_nodes[qargs_index]
+            if len(input_node.users) == 0:
+                continue
+            if len(out_qparams_map := input_node.meta.get("output_qparams", {})) != 1:
+                raise ValueError(
+                    f"Expected submodule input {input_node} to have exactly one output qparam, got {out_qparams_map}"
+                )
+            in_qparams = input_qparams_map[qargs_index]
+            out_qparams = cast(QuantArgs, out_qparams_map[0])
+
+            # Remove qparam meta to not confuse folding pass.
+            del input_node.meta["output_qparams"]
+            if in_qparams == out_qparams:
+                continue
+            with submodule.graph.inserting_after(input_node):
+                modified = True
+                rescale_node = self._insert_rescale(
+                    in_qparams, out_qparams, input_node, submodule
+                )
+                input_node.replace_all_uses_with(replace_with=rescale_node)
+                rescale_node.update_arg(0, input_node)
+        return modified
+
+    def _rescale_submodule_outputs(
+        self, submodule: GraphModule, output_qparams_map: Dict[int, QuantArgs]
+    ) -> bool:
+        """Insert rescales at the outputs of `submodule` to match the qparams outside the submodule.
+        Matching the correct qparams gets a bit tricky:
+        Submodule:             | Containing module:
+        output_nodes => output |=> getitems => ...
+
+        The dq->q qparam pair we want to convert to a rescale is:
+        (input qparam of output_node, output qparam of getitem)
+        And the rescale is inserted between op and output. Note that the output qparam of op is called input_qargs,
+        since the it is the input to the dq-q pair.
+
+        Args:
+            submodule: GraphModule: the GraphModule in which to rescale the outputs.
+            output_qparams_map: A map of output indexes mapping to QuantArgs. Not guaranteed to contain a mapping
+                for every submodule output.
+        Returns:
+            True if at least one rescale was inserted, False otherwise.
+        """
+
+        modified = False
+        output_node = submodule.graph.output_node()
+        output_args = list(cast(tuple[Node], output_node.args[0]))
+        input_qparams_map = cast(
+            dict[int, QuantArgs], output_node.meta["input_qparams"]
+        )
+        for qargs_index in output_qparams_map:
+            output_arg_node = output_args[qargs_index]
+            in_qparams = input_qparams_map[qargs_index]
+            out_qparams = output_qparams_map[qargs_index]
+            if in_qparams == out_qparams:
+                continue
+            with submodule.graph.inserting_before(output_node):
+                modified = True
+                rescale_node = self._insert_rescale(
+                    in_qparams, out_qparams, output_arg_node, submodule
+                )
+                output_args[qargs_index] = rescale_node
+                rescale_node.update_arg(0, output_arg_node)
+        output_node.update_arg(0, tuple(output_args))
+        # Remove qparam meta to not confuse folding pass.
+        del output_node.meta["input_qparams"]
+        return modified
+
+    def _get_input_qparams_map(self, node: Node, idx: int):
+        input_qparams_meta = cast(
+            dict[int, QuantArgs], node.meta.get("input_qparams", None)
+        )
+        if input_qparams_meta:
+            input_qparams = cast(QuantArgs, input_qparams_meta.get(idx, None))
+            if not input_qparams:
+                raise ValueError(
+                    f"Expected entry with key {idx} in input_qparams meta, got {input_qparams_meta}"
+                )
+            num_inputs = len(cast(list, node.args[idx]))
+
+            # Currently, infra only supports one set of qparams for a list of inputs
+            # Map all inputs to the same qparams.
+            input_qparams_map = {i: input_qparams for i in range(num_inputs)}
+            return input_qparams_map
+        return None
+
+    def _get_output_qparams_map(self, node: Node):
+        output_qparams_map: dict[int, QuantArgs] = {}
+        for getitem_node in node.users:
+            idx = cast(int, getitem_node.args[1])
+            qparam = getitem_node.meta.get("output_qparams", None)
+            if qparam:
+                output_qparams_map[idx] = cast(QuantArgs, qparam[0])
+        return output_qparams_map
+
+    def _rescale_cond_submodules(self, node: Node, graph_module: GraphModule) -> bool:
+        modified = False
+        if_graph: GraphModule = cast(GraphModule, graph_module.get_submodule(node.args[1].target))  # type: ignore
+        else_graph: GraphModule = cast(GraphModule, graph_module.get_submodule(node.args[2].target))  # type: ignore
+        input_qparams_map = self._get_input_qparams_map(node, 3)
+        if input_qparams_map:
+            modified |= self._rescale_submodule_inputs(if_graph, input_qparams_map)
+            modified |= self._rescale_submodule_inputs(else_graph, input_qparams_map)
+
+        output_qparams_map = self._get_output_qparams_map(node)
+        if output_qparams_map:
+            modified |= self._rescale_submodule_outputs(if_graph, output_qparams_map)
+            modified |= self._rescale_submodule_outputs(else_graph, output_qparams_map)
+        return modified
+
+    def _rescale_while_submodules(self, node: Node, graph_module: GraphModule):
+        modified = False
+        cond_graph: GraphModule = cast(GraphModule, graph_module.get_submodule(node.args[0].target))  # type: ignore
+        body_graph: GraphModule = cast(GraphModule, graph_module.get_submodule(node.args[1].target))  # type: ignore
+
+        input_qparams_map = self._get_input_qparams_map(node, 2)
+        if input_qparams_map:
+            modified |= self._rescale_submodule_inputs(cond_graph, input_qparams_map)
+            modified |= self._rescale_submodule_inputs(body_graph, input_qparams_map)
+
+        output_qparams_map = self._get_output_qparams_map(node)
+        if output_qparams_map:
+            modified |= self._rescale_submodule_outputs(body_graph, output_qparams_map)
+        return modified
+
+    def call(self, graph_module: GraphModule) -> PassResult:
+        modified = False
+
+        for node in list(graph_module.graph.nodes):
+            node = cast(Node, node)
+            if node.op != "call_function":
+                continue
+
+            if node.target == torch.ops.higher_order.cond:
+                modified = self._rescale_cond_submodules(node, graph_module)
+            if node.target == torch.ops.higher_order.while_loop:
+                modified = self._rescale_while_submodules(node, graph_module)
+
+        if modified:
+            # Retrace the graph to update the fake tensor types
+            graph_module = super().call(graph_module).graph_module
+            graph_module.recompile()
+
+        return PassResult(graph_module, modified)
@@ -6,6 +6,7 @@
 
 from . import (  # noqa
     clone_dim_order_support,
+    control_flow_support,
     convolution_support,
     embedding_support,
     ethos_u55_support,
Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@`
`85`	`85`	`FuseEqualPlaceholdersPass,`
`86`	`86`	`FuseQuantizedActivationPass,`
`87`	`87`	`FuseViewCopyTransformPass,`
	`88`	`+ InsertControlFlowRescalesPass,`
`88`	`89`	`InsertInt32CastsAfterInt64PlaceholdersPass,`
`89`	`90`	`InsertRescaleInt32Pass,`
`90`	`91`	`InsertRescalePass,`
`@@ -195,6 +196,7 @@ def _tosa_pipeline(`
`195`	`196`	`# Ticket: MLETORCH-1539`
`196`	`197`	`DecomposeLinearPass(),`
`197`	`198`	`InsertRescaleInt32Pass(),`
	`199`	`+ InsertControlFlowRescalesPass(),`
`198`	`200`	`]`
`199`	`201`	`)`
`200`	`202`