Skip to content

Commit b0597f6

Browse files
committed
Update on "[BE] Clean pte_data_map"
- Add maybe_unused to size test - Clean up and try to reduce some binary size from pte_data_map. Differential Revision: [D77904843](https://our.internmc.facebook.com/intern/diff/D77904843/) [ghstack-poisoned]
2 parents 6e6f81f + b11c858 commit b0597f6

File tree

86 files changed

+1520
-934
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+1520
-934
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
1+
7cda4017ddda554752e89069ae205be5e8388f59

.ci/scripts/check_c10_sync.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ pushd pytorch
1212
git checkout "$pytorch_pin"
1313
popd
1414
"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
15-
"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
15+
"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/headeronly pytorch/torch/headeronly

.github/workflows/lint.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,13 @@ jobs:
8383
script: |
8484
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
8585
extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
86+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
87+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
88+
extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
8689
examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
87-
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
90+
examples/demo-apps/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/*.java \
91+
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
92+
extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
8893
if [ -n "$FILES_NEEDS_FORMAT" ]; then
8994
echo "Warning: The following files need formatting. Please use google-java-format."
9095
echo "Use a binary from https://github.com/google/google-java-format/releases/"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Any, List, Optional, Tuple
8+
9+
import executorch
10+
import executorch.backends.test.harness.stages as BaseStages
11+
12+
import torch
13+
from executorch.backends.apple.coreml.partition import CoreMLPartitioner
14+
from executorch.backends.test.harness import Tester as TesterBase
15+
from executorch.backends.test.harness.stages import StageType
16+
from executorch.exir import EdgeCompileConfig
17+
from executorch.exir.backend.partitioner import Partitioner
18+
19+
20+
class Partition(BaseStages.Partition):
21+
def __init__(self, partitioner: Optional[Partitioner] = None):
22+
super().__init__(
23+
partitioner=partitioner or CoreMLPartitioner,
24+
)
25+
26+
27+
class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
28+
def __init__(
29+
self,
30+
partitioners: Optional[List[Partitioner]] = None,
31+
edge_compile_config: Optional[EdgeCompileConfig] = None,
32+
):
33+
super().__init__(
34+
default_partitioner_cls=CoreMLPartitioner,
35+
partitioners=partitioners,
36+
edge_compile_config=edge_compile_config,
37+
)
38+
39+
40+
class CoreMLTester(TesterBase):
41+
def __init__(
42+
self,
43+
module: torch.nn.Module,
44+
example_inputs: Tuple[torch.Tensor],
45+
dynamic_shapes: Optional[Tuple[Any]] = None,
46+
):
47+
# Specialize for XNNPACK
48+
stage_classes = (
49+
executorch.backends.test.harness.Tester.default_stage_classes()
50+
| {
51+
StageType.PARTITION: Partition,
52+
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
53+
}
54+
)
55+
56+
super().__init__(
57+
module=module,
58+
stage_classes=stage_classes,
59+
example_inputs=example_inputs,
60+
dynamic_shapes=dynamic_shapes,
61+
)

backends/arm/_passes/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ python_library(
66
deps = [
77
"//executorch/backends/arm:tosa_quant_utils",
88
"//executorch/backends/arm:tosa_utils",
9+
"//executorch/backends/arm/tosa/dialect:lib",
910
"//executorch/backends/transforms:fuse_view_copy",
1011
"//executorch/backends/transforms:remove_getitem_op",
1112
"//executorch/backends/transforms:replace_scalar_with_tensor",

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from .decompose_sqrt_pass import DecomposeSqrtPass # noqa
5252
from .decompose_sum_pass import DecomposeSumPass # noqa
5353
from .decompose_var_pass import DecomposeVarPass # noqa
54+
from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass # noqa
5455
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
5556
FoldAndAnnotateQParamsPass,
5657
QuantizeOperatorArguments,

backends/arm/_passes/arm_pass_manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
# LICENSE file in the root directory of this source tree.
77

88
# pyre-unsafe
9-
10-
import executorch.backends.arm.tosa.dialect # noqa: unused
119
from executorch.backends.arm._passes import (
1210
AddBiasPass,
1311
AnnotateChannelsLastDimOrder,
@@ -56,6 +54,7 @@
5654
DecomposeSqrtPass,
5755
DecomposeSumPass,
5856
DecomposeVarPass,
57+
DecorateFp32toInt32CastingPass,
5958
FoldAndAnnotateQParamsPass,
6059
FuseBatchnorm2DPass,
6160
FuseConstantArgsPass,
@@ -200,6 +199,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
200199
self.add_pass(MatchArgRanksPass(exported_program))
201200
self.add_pass(DecomposeAdaptiveAvgPool2dPass())
202201
self.add_pass(DecomposeAvgPool2d())
202+
self.add_pass(
203+
DecorateFp32toInt32CastingPass()
204+
) # Require that no new fp32->int32 is introduced after this pass
203205
self.add_pass(ComputeConstantOpsAOT(exported_program))
204206

205207
self.add_pass(DecomposeGroupedConv())
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# pyre-unsafe
7+
8+
9+
import torch
10+
from executorch.backends.arm._passes import ArmPass
11+
from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
12+
from executorch.exir.dialects._ops import ops as exir_ops
13+
14+
15+
def _get_decorated_ops(op):
16+
if op in DecorateFp32toInt32CastingPass.targets:
17+
return (
18+
exir_ops.edge.aten.full.default,
19+
exir_ops.edge.aten.ge.Tensor,
20+
exir_ops.edge.aten.floor.default,
21+
exir_ops.edge.aten.ceil.default,
22+
exir_ops.edge.aten.where.self,
23+
)
24+
else:
25+
raise RuntimeError(f"Can't get decorated ops for op {op}")
26+
27+
28+
class DecorateFp32toInt32CastingPass(ArmPass):
29+
"""
30+
To lower pytorch fp32 -> int32 casting to TOSA,
31+
we need to transform the value with Ceil, Floor, and Where.
32+
Before:
33+
output = to_copy(x, dtype=torch.int32)
34+
After:
35+
%zero = full((1,), 0.0, dtype=torch.float32)
36+
is_non_negative = x >= %zero
37+
floor_x = floor(x)
38+
ceil_x = ceil(x)
39+
decorated_x = where(is_non_negative, floor_x, ceil_x)
40+
output = to_copy(decorated_x, dtype=torch.int32)
41+
"""
42+
43+
targets = [
44+
exir_ops.edge.aten._to_copy.default,
45+
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
46+
]
47+
48+
def call_operator(self, op, args, kwargs, meta):
49+
if op not in self.targets:
50+
return super().call_operator(op, args, kwargs, meta)
51+
52+
input = get_node_arg(args, 0)
53+
input_dtype = input.node.meta["val"].dtype
54+
output_dtype = meta["val"].dtype
55+
56+
if not (input_dtype == torch.float32 and output_dtype == torch.int32):
57+
return super().call_operator(op, args, kwargs, meta)
58+
59+
op_full, op_ge, op_floor, op_ceil, op_where = _get_decorated_ops(op)
60+
61+
zero = super().call_operator(
62+
op_full,
63+
args=((1,) * len(meta["val"].size()), 0.0),
64+
kwargs={"dtype": torch.float32},
65+
meta=meta,
66+
updated=True,
67+
)
68+
69+
is_non_negative = super().call_operator(
70+
op_ge, (input, zero), {}, meta, updated=True
71+
)
72+
floor_x = super().call_operator(op_floor, (input,), {}, meta, updated=True)
73+
ceil_x = super().call_operator(op_ceil, (input,), {}, meta, updated=True)
74+
decorated_x = super().call_operator(
75+
op_where, (is_non_negative, floor_x, ceil_x), {}, meta, updated=True
76+
)
77+
78+
return super().call_operator(op, (decorated_x,), kwargs, meta, updated=True)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import Tuple
7+
8+
import torch
9+
from executorch.backends.arm.test import common
10+
11+
from executorch.backends.arm.test.tester.test_pipeline import (
12+
OpNotSupportedPipeline,
13+
TosaPipelineMI,
14+
)
15+
16+
input_t1 = Tuple[torch.Tensor] # Input x
17+
18+
19+
class FP32ToINT32Casting(torch.nn.Module):
20+
def __init__(self, target_dtype):
21+
super().__init__()
22+
self.target_dtype = target_dtype
23+
24+
def forward(self, x: torch.Tensor):
25+
return x.to(self.target_dtype)
26+
27+
28+
test_data_fp32_input = {
29+
"fp32_input_rank1": lambda: (
30+
torch.rand((4), dtype=torch.float32),
31+
torch.int32,
32+
),
33+
"fp32_input_rank2": lambda: (
34+
torch.rand((3, 4), dtype=torch.float32),
35+
torch.int32,
36+
),
37+
"fp32_input_rank3": lambda: (
38+
torch.rand((2, 3, 4), dtype=torch.float32),
39+
torch.int32,
40+
),
41+
"fp32_input_rank4": lambda: (
42+
torch.rand((1, 2, 3, 4), dtype=torch.float32),
43+
torch.int32,
44+
),
45+
}
46+
47+
48+
@common.parametrize("test_data", test_data_fp32_input)
49+
def test_decorate_fp32_to_int32_casting_tosa_MI(test_data: Tuple):
50+
test_tensor, target_dtype = test_data()
51+
module = FP32ToINT32Casting(target_dtype)
52+
53+
pipeline = TosaPipelineMI[input_t1](
54+
module,
55+
(test_tensor,),
56+
aten_op=[],
57+
exir_op=[],
58+
)
59+
pipeline.run()
60+
61+
62+
@common.parametrize("test_data", test_data_fp32_input)
63+
def test_decorate_fp32_to_int32_casting_tosa_BI(test_data: Tuple):
64+
"""
65+
Casting operation involving floating-point dtypes will be rejected in BI/INT profile.
66+
Therefore, the DecorateFp32toInt32CastingPass is not required in this profile.
67+
Add a BI test to ensure that such casting is rejected as expected.
68+
"""
69+
test_tensor, target_dtype = test_data()
70+
module = FP32ToINT32Casting(target_dtype)
71+
72+
pipeline = OpNotSupportedPipeline[input_t1](
73+
module,
74+
(test_tensor,),
75+
{
76+
"executorch_exir_dialects_edge__ops_dim_order_ops__to_dim_order_copy_default": 1
77+
},
78+
quantize=True,
79+
)
80+
pipeline.run()

backends/arm/tosa/dialect/TARGETS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
2+
3+
python_library(
4+
name = "lib",
5+
srcs = glob(["*.py"]),
6+
)

0 commit comments

Comments
 (0)