Skip to content

Commit d35cbda

Browse files
authored
Merge branch 'main' into remove-xfails
2 parents 9afe3ca + fedaa2d commit d35cbda

File tree

61 files changed

+3192
-1402
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+3192
-1402
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ xcuserdata/
6262
/include/
6363
/share/
6464
/version.py
65-
*.csv
6665
*_etdump
6766

6867
# Android

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ executorch
3434
│ ├── <a href="backends/qualcomm">qualcomm</a> - Qualcomm-specific backends. See <a href="docs/source/backends-qualcomm.md">doc</a>.
3535
│ ├── <a href="backends/transforms">transforms</a> - Transformations for backend optimization.
3636
│ ├── <a href="backends/vulkan">vulkan</a> - Vulkan backend for cross-platform GPU support. See <a href="docs/source/backends-vulkan.md">doc</a>.
37-
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends-xnnpack.md">doc</a>.
37+
│ └── <a href="backends/xnnpack">xnnpack</a> - XNNPACK backend for optimized neural network operations. See <a href="docs/source/backends/xnnpack/xnnpack-overview.md">doc</a>.
3838
├── <a href="codegen">codegen</a> - Tooling to autogenerate bindings between kernels and the runtime.
3939
├── <a href="configurations">configurations</a> - Configuration files.
4040
├── <a href="devtools">devtools</a> - Model profiling, debugging, and inspection. Please refer to the <a href="docs/source/devtools-overview.md">tools documentation</a> for more information.

README-wheel.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ The `executorch` pip package is in beta.
1111
The prebuilt `executorch.runtime` module included in this package provides a way
1212
to run ExecuTorch `.pte` files, with some restrictions:
1313
* Only [core ATen operators](docs/source/ir-ops-set-definition.md) are linked into the prebuilt module
14-
* Only the [XNNPACK backend delegate](docs/source/backends-xnnpack.md) is linked into the prebuilt module.
14+
* Only the [XNNPACK backend delegate](docs/source/backends/xnnpack/xnnpack-overview.md) is linked into the prebuilt module.
1515
* \[macOS only] [Core ML](docs/source/backends/coreml/coreml-overview.md) and [MPS](docs/source/backends/mps/mps-overview.md) backend
1616
are also linked into the prebuilt module.
1717

backends/arm/test/models/test_nn_modules.py

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,91 @@
1717
- Transformer
1818
"""
1919

20+
from typing import Callable
21+
2022
import torch
2123
from executorch.backends.arm.test.common import parametrize
2224
from executorch.backends.arm.test.tester.test_pipeline import (
2325
TosaPipelineFP,
2426
TosaPipelineINT,
2527
)
2628

29+
30+
def make_module_wrapper(
31+
name: str, module_factory: Callable[[], torch.nn.Module]
32+
) -> torch.nn.Module:
33+
class ModuleWrapper(torch.nn.Module):
34+
def __init__(self):
35+
super().__init__()
36+
self._module = module_factory()
37+
38+
def forward(self, *args, **kwargs):
39+
return self._module(*args, **kwargs)
40+
41+
ModuleWrapper.__name__ = name
42+
ModuleWrapper.__qualname__ = name
43+
return ModuleWrapper()
44+
45+
2746
example_input = torch.rand(1, 6, 16, 16)
2847

2948
module_tests = [
30-
(torch.nn.Embedding(10, 10), (torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]]),)),
31-
(torch.nn.LeakyReLU(), (example_input,)),
32-
(torch.nn.BatchNorm1d(16), (torch.rand(6, 16, 16),)),
33-
(torch.nn.AdaptiveAvgPool2d((12, 12)), (example_input,)),
34-
(torch.nn.ConvTranspose2d(6, 3, 2), (example_input,)),
35-
(torch.nn.GRU(10, 20, 2), (torch.randn(5, 3, 10), torch.randn(2, 3, 20))),
36-
(torch.nn.GroupNorm(2, 6), (example_input,)),
37-
(torch.nn.InstanceNorm2d(16), (example_input,)),
38-
(torch.nn.PReLU(), (example_input,)),
3949
(
40-
torch.nn.Transformer(
41-
d_model=64,
42-
nhead=1,
43-
num_encoder_layers=1,
44-
num_decoder_layers=1,
45-
dtype=torch.float32,
50+
make_module_wrapper(
51+
"EmbeddingModule",
52+
lambda: torch.nn.Embedding(10, 10),
53+
),
54+
(torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]]),),
55+
),
56+
(
57+
make_module_wrapper("LeakyReLUModule", torch.nn.LeakyReLU),
58+
(example_input,),
59+
),
60+
(
61+
make_module_wrapper("BatchNorm1dModule", lambda: torch.nn.BatchNorm1d(16)),
62+
(torch.rand(6, 16, 16),),
63+
),
64+
(
65+
make_module_wrapper(
66+
"AdaptiveAvgPool2dModule",
67+
lambda: torch.nn.AdaptiveAvgPool2d((12, 12)),
68+
),
69+
(example_input,),
70+
),
71+
(
72+
make_module_wrapper(
73+
"ConvTranspose2dModule", lambda: torch.nn.ConvTranspose2d(6, 3, 2)
74+
),
75+
(example_input,),
76+
),
77+
(
78+
make_module_wrapper("GRUModule", lambda: torch.nn.GRU(10, 20, 2)),
79+
(torch.randn(5, 3, 10), torch.randn(2, 3, 20)),
80+
),
81+
(
82+
make_module_wrapper("GroupNormModule", lambda: torch.nn.GroupNorm(2, 6)),
83+
(example_input,),
84+
),
85+
(
86+
make_module_wrapper(
87+
"InstanceNorm2dModule", lambda: torch.nn.InstanceNorm2d(16)
88+
),
89+
(example_input,),
90+
),
91+
(
92+
make_module_wrapper("PReLUModule", torch.nn.PReLU),
93+
(example_input,),
94+
),
95+
(
96+
make_module_wrapper(
97+
"TransformerModule",
98+
lambda: torch.nn.Transformer(
99+
d_model=64,
100+
nhead=1,
101+
num_encoder_layers=1,
102+
num_decoder_layers=1,
103+
dtype=torch.float32,
104+
),
46105
),
47106
(torch.rand((10, 32, 64)), torch.rand((20, 32, 64))),
48107
),
@@ -78,9 +137,9 @@ def test_nn_Modules_FP(test_data):
78137
"test_data",
79138
test_parameters,
80139
xfails={
81-
"GRU": "RuntimeError: Node aten_linear_default with op <EdgeOpOverload: aten.linear[...]> was not decomposed or delegated.",
82-
"PReLU": "RuntimeError: mul(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.",
83-
"Transformer": "AssertionError: Output 0 does not match reference output.",
140+
"GRUModule": "RuntimeError: Node aten_linear_default with op <EdgeOpOverload: aten.linear[...]> was not decomposed or delegated.",
141+
"PReLUModule": "RuntimeError: mul(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.",
142+
"TransformerModule": "AssertionError: Output 0 does not match reference output.",
84143
},
85144
)
86145
def test_nn_Modules_INT(test_data):

backends/cadence/aot/ops_registrations.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,10 @@ def _validate_ref_impl_exists() -> None:
5353
# 1. be removed
5454
# 2. have a reference implementation added to ref_implementations.py
5555
_WARN_ONLY = {
56-
"cadence::quantized_w8a32_linear",
57-
"cadence::quantized_add", # We should only support per_tensor variant, should remove
5856
"cadence::_softmax_f32_f32",
59-
"cadence::requantize", # We should only support per_tensor variant, should remove
6057
"cadence::quantized_softmax.per_tensor",
61-
"cadence::quantized_conv2d_nchw", # We should only support per_tensor variant, should remove
62-
"cadence::quantized_relu", # We should only support per_tensor variant, should remove
63-
"cadence::quantized_conv2d_nhwc", # We should only support per_tensor variant, should remove
6458
"cadence::quantized_softmax",
6559
"cadence::quantized_w8a32_gru",
66-
"cadence::quantized_layer_norm", # We should only support per_tensor variant, should remove
6760
}
6861

6962
ref_impls = get_registered_ref_implementations()
@@ -2706,6 +2699,9 @@ def quantized_w8a32_linear_meta(
27062699
# output comes in empty with shape [leading_dims, out_dim]
27072700
src_shape = list(src.shape)
27082701
weight_shape = weight.shape
2702+
assert (src_shape[-1] % 4) == 0
2703+
if len(src_shape) >= 2:
2704+
assert src_shape[-2] == 1
27092705
assert len(weight_shape) == 2
27102706
assert src_shape[-1] == weight_shape[-1]
27112707
src_shape[-1] = weight_shape[0]
@@ -2720,12 +2716,12 @@ def quantized_w8a32_conv_meta(
27202716
bias: torch.Tensor,
27212717
b_scale: float,
27222718
) -> torch.Tensor:
2723-
# src comes in shape [batch, in_channel, in_length]
2724-
# weight comes in shape [out_ch, in_ch, kernel_dim]
2719+
# src comes in shape [batch, in_length, in_channels]
2720+
# weight comes in shape [kernel_dim, out_ch, in_ch]
27252721
# output comes in empty with shape [batch, out_ch, in_length - kernel_dim + 1]
27262722
assert len(src.shape) == 3
27272723

2728-
out_channels, in_channels, kernel_size = weight.shape
2724+
kernel_size, out_channels, in_channels = weight.shape
27292725
assert kernel_size == 3
27302726
assert (out_channels % 4) == 0
27312727
assert (in_channels % 4) == 0

backends/cadence/aot/quantizer/fusion_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def get_args_and_kwargs_mixed_w8a32_conv(
397397
)
398398
transposed_weights = graph_module.graph.call_function(
399399
torch.ops.aten.permute.default,
400-
(weights_inputs[0], [2, 0, 1]), # NCL -> NLC
400+
(weights_inputs[0], [2, 0, 1]), # NCL -> LNC
401401
)
402402

403403
args = (

0 commit comments

Comments
 (0)