pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/lint.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎backends/apple/coreml/test/tester.py‎
Lines changed: 61 additions & 0 deletions b/‎backends/apple/coreml/test/tester.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 2 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 78 additions & 0 deletions b/‎backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 80 additions & 0 deletions b/‎backends/arm/test/passes/test_decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎backends/arm/tosa/dialect/TARGETS‎
Lines changed: 6 additions & 0 deletions b/‎backends/arm/tosa/dialect/TARGETS‎
Lines changed: 6 additions & 0 deletions
@@ -1 +1 @@
-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
+7cda4017ddda554752e89069ae205be5e8388f59
@@ -12,4 +12,4 @@ pushd pytorch
 git checkout "$pytorch_pin"
 popd
 "$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
-"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
+"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/headeronly pytorch/torch/headeronly
@@ -83,8 +83,13 @@ jobs:
       script: |
         FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
           extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
+          extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
+          extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
+          extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
           examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
-          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
+          examples/demo-apps/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/*.java \
+          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
+          extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
         if [ -n "$FILES_NEEDS_FORMAT" ]; then
           echo "Warning: The following files need formatting. Please use google-java-format."
           echo "Use a binary from https://github.com/google/google-java-format/releases/"
 
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, List, Optional, Tuple
+
+import executorch
+import executorch.backends.test.harness.stages as BaseStages
+
+import torch
+from executorch.backends.apple.coreml.partition import CoreMLPartitioner
+from executorch.backends.test.harness import Tester as TesterBase
+from executorch.backends.test.harness.stages import StageType
+from executorch.exir import EdgeCompileConfig
+from executorch.exir.backend.partitioner import Partitioner
+
+
+class Partition(BaseStages.Partition):
+    def __init__(self, partitioner: Optional[Partitioner] = None):
+        super().__init__(
+            partitioner=partitioner or CoreMLPartitioner,
+        )
+
+
+class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
+    def __init__(
+        self,
+        partitioners: Optional[List[Partitioner]] = None,
+        edge_compile_config: Optional[EdgeCompileConfig] = None,
+    ):
+        super().__init__(
+            default_partitioner_cls=CoreMLPartitioner,
+            partitioners=partitioners,
+            edge_compile_config=edge_compile_config,
+        )
+
+
+class CoreMLTester(TesterBase):
+    def __init__(
+        self,
+        module: torch.nn.Module,
+        example_inputs: Tuple[torch.Tensor],
+        dynamic_shapes: Optional[Tuple[Any]] = None,
+    ):
+        # Specialize for XNNPACK
+        stage_classes = (
+            executorch.backends.test.harness.Tester.default_stage_classes()
+            | {
+                StageType.PARTITION: Partition,
+                StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
+            }
+        )
+
+        super().__init__(
+            module=module,
+            stage_classes=stage_classes,
+            example_inputs=example_inputs,
+            dynamic_shapes=dynamic_shapes,
+        )
@@ -6,6 +6,7 @@ python_library(
     deps = [
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/arm/tosa/dialect:lib",
         "//executorch/backends/transforms:fuse_view_copy",
         "//executorch/backends/transforms:remove_getitem_op",
         "//executorch/backends/transforms:replace_scalar_with_tensor",
 
@@ -51,6 +51,7 @@
 from .decompose_sqrt_pass import DecomposeSqrtPass  # noqa
 from .decompose_sum_pass import DecomposeSumPass  # noqa
 from .decompose_var_pass import DecomposeVarPass  # noqa
+from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass  # noqa
 from .fold_qdq_with_annotated_qparams_pass import (  # noqa
     FoldAndAnnotateQParamsPass,
     QuantizeOperatorArguments,
 
@@ -6,8 +6,6 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
-
-import executorch.backends.arm.tosa.dialect  # noqa: unused
 from executorch.backends.arm._passes import (
     AddBiasPass,
     AnnotateChannelsLastDimOrder,
@@ -56,6 +54,7 @@
     DecomposeSqrtPass,
     DecomposeSumPass,
     DecomposeVarPass,
+    DecorateFp32toInt32CastingPass,
     FoldAndAnnotateQParamsPass,
     FuseBatchnorm2DPass,
     FuseConstantArgsPass,
@@ -200,6 +199,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(MatchArgRanksPass(exported_program))
         self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
+        self.add_pass(
+            DecorateFp32toInt32CastingPass()
+        )  # Require that no new fp32->int32 is introduced after this pass
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
         self.add_pass(DecomposeGroupedConv())
 
@@ -0,0 +1,78 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+def _get_decorated_ops(op):
+    if op in DecorateFp32toInt32CastingPass.targets:
+        return (
+            exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.ge.Tensor,
+            exir_ops.edge.aten.floor.default,
+            exir_ops.edge.aten.ceil.default,
+            exir_ops.edge.aten.where.self,
+        )
+    else:
+        raise RuntimeError(f"Can't get decorated ops for op {op}")
+
+
+class DecorateFp32toInt32CastingPass(ArmPass):
+    """
+    To lower pytorch fp32 -> int32 casting to TOSA,
+    we need to transform the value with Ceil, Floor, and Where.
+    Before:
+        output = to_copy(x, dtype=torch.int32)
+    After:
+        %zero = full((1,), 0.0, dtype=torch.float32)
+        is_non_negative = x >= %zero
+        floor_x = floor(x)
+        ceil_x = ceil(x)
+        decorated_x = where(is_non_negative, floor_x, ceil_x)
+        output = to_copy(decorated_x, dtype=torch.int32)
+    """
+
+    targets = [
+        exir_ops.edge.aten._to_copy.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+    ]
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in self.targets:
+            return super().call_operator(op, args, kwargs, meta)
+
+        input = get_node_arg(args, 0)
+        input_dtype = input.node.meta["val"].dtype
+        output_dtype = meta["val"].dtype
+
+        if not (input_dtype == torch.float32 and output_dtype == torch.int32):
+            return super().call_operator(op, args, kwargs, meta)
+
+        op_full, op_ge, op_floor, op_ceil, op_where = _get_decorated_ops(op)
+
+        zero = super().call_operator(
+            op_full,
+            args=((1,) * len(meta["val"].size()), 0.0),
+            kwargs={"dtype": torch.float32},
+            meta=meta,
+            updated=True,
+        )
+
+        is_non_negative = super().call_operator(
+            op_ge, (input, zero), {}, meta, updated=True
+        )
+        floor_x = super().call_operator(op_floor, (input,), {}, meta, updated=True)
+        ceil_x = super().call_operator(op_ceil, (input,), {}, meta, updated=True)
+        decorated_x = super().call_operator(
+            op_where, (is_non_negative, floor_x, ceil_x), {}, meta, updated=True
+        )
+
+        return super().call_operator(op, (decorated_x,), kwargs, meta, updated=True)
@@ -0,0 +1,80 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+from executorch.backends.arm.test import common
+
+from executorch.backends.arm.test.tester.test_pipeline import (
+    OpNotSupportedPipeline,
+    TosaPipelineMI,
+)
+
+input_t1 = Tuple[torch.Tensor]  # Input x
+
+
+class FP32ToINT32Casting(torch.nn.Module):
+    def __init__(self, target_dtype):
+        super().__init__()
+        self.target_dtype = target_dtype
+
+    def forward(self, x: torch.Tensor):
+        return x.to(self.target_dtype)
+
+
+test_data_fp32_input = {
+    "fp32_input_rank1": lambda: (
+        torch.rand((4), dtype=torch.float32),
+        torch.int32,
+    ),
+    "fp32_input_rank2": lambda: (
+        torch.rand((3, 4), dtype=torch.float32),
+        torch.int32,
+    ),
+    "fp32_input_rank3": lambda: (
+        torch.rand((2, 3, 4), dtype=torch.float32),
+        torch.int32,
+    ),
+    "fp32_input_rank4": lambda: (
+        torch.rand((1, 2, 3, 4), dtype=torch.float32),
+        torch.int32,
+    ),
+}
+
+
+@common.parametrize("test_data", test_data_fp32_input)
+def test_decorate_fp32_to_int32_casting_tosa_MI(test_data: Tuple):
+    test_tensor, target_dtype = test_data()
+    module = FP32ToINT32Casting(target_dtype)
+
+    pipeline = TosaPipelineMI[input_t1](
+        module,
+        (test_tensor,),
+        aten_op=[],
+        exir_op=[],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_fp32_input)
+def test_decorate_fp32_to_int32_casting_tosa_BI(test_data: Tuple):
+    """
+    Casting operation involving floating-point dtypes will be rejected in BI/INT profile.
+    Therefore, the DecorateFp32toInt32CastingPass is not required in this profile.
+    Add a BI test to ensure that such casting is rejected as expected.
+    """
+    test_tensor, target_dtype = test_data()
+    module = FP32ToINT32Casting(target_dtype)
+
+    pipeline = OpNotSupportedPipeline[input_t1](
+        module,
+        (test_tensor,),
+        {
+            "executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1
+        },
+        quantize=True,
+    )
+    pipeline.run()
@@ -0,0 +1,6 @@
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+python_library(
+    name = "lib",
+    srcs = glob(["*.py"]),
+)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f`
	`1`	`+7cda4017ddda554752e89069ae205be5e8388f59`