From 51c4a5a8897c57ab7393632982ccbf97c129aac5 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Wed, 7 May 2025 18:24:37 -0700 Subject: [PATCH 001/178] Vulkan tests use executorch_core Differential Revision: D74365586 Pull Request resolved: https://github.com/pytorch/executorch/pull/10765 --- backends/vulkan/test/CMakeLists.txt | 2 +- backends/vulkan/test/op_tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/vulkan/test/CMakeLists.txt b/backends/vulkan/test/CMakeLists.txt index 95f0179f367..0b3f22875ad 100644 --- a/backends/vulkan/test/CMakeLists.txt +++ b/backends/vulkan/test/CMakeLists.txt @@ -82,7 +82,7 @@ if(TARGET vulkan_backend) ) target_include_directories(vulkan_compute_api_test PRIVATE ${COMMON_INCLUDES}) target_link_libraries( - vulkan_compute_api_test PRIVATE GTest::gtest_main vulkan_backend executorch + vulkan_compute_api_test PRIVATE GTest::gtest_main vulkan_backend executorch_core test_shaderlib ) target_compile_options(vulkan_compute_api_test PRIVATE ${VULKAN_CXX_FLAGS}) diff --git a/backends/vulkan/test/op_tests/CMakeLists.txt b/backends/vulkan/test/op_tests/CMakeLists.txt index 584719d5c28..a34d3f297f6 100644 --- a/backends/vulkan/test/op_tests/CMakeLists.txt +++ b/backends/vulkan/test/op_tests/CMakeLists.txt @@ -81,7 +81,7 @@ function(vulkan_op_test test_name test_src) ${test_name} PRIVATE GTest::gtest_main vulkan_backend - executorch + executorch_core ${LIB_TORCH} ${LIB_TORCH_CPU} ${LIB_C10} From 486398856a1fa416fa6a37a6e4d1691ba7e04210 Mon Sep 17 00:00:00 2001 From: Eli Amesefe Date: Wed, 7 May 2025 18:29:36 -0700 Subject: [PATCH 002/178] Handle avg_pool2d with padding == 0 as no padding Differential Revision: D74117402 Pull Request resolved: https://github.com/pytorch/executorch/pull/10697 --- .../arm/operator_support/pool_2d_support.py | 5 +++- backends/arm/test/ops/test_avg_pool2d.py | 25 ++++++++++++++----- backends/arm/test/targets.bzl | 1 + 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/backends/arm/operator_support/pool_2d_support.py b/backends/arm/operator_support/pool_2d_support.py index 750fab2730d..f4ada36de80 100644 --- a/backends/arm/operator_support/pool_2d_support.py +++ b/backends/arm/operator_support/pool_2d_support.py @@ -54,8 +54,11 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification): kernel = cast(tuple[int, int], node.args[1]) stride = cast(tuple[int, int], node.args[2]) if len(node.args) > 3: + padding = cast(tuple[int, int], node.args[3]) # Padding case - if not all(1 <= k <= 8 for k in kernel): + if not all(1 <= k <= 8 for k in kernel) and not all( + v == 0 for v in padding + ): self.reporter.report_reject( node, f"Avgpool2d with padding needs kernel dims < 8, got {kernel}" ) diff --git a/backends/arm/test/ops/test_avg_pool2d.py b/backends/arm/test/ops/test_avg_pool2d.py index 2a50ef38834..c48595aec7f 100644 --- a/backends/arm/test/ops/test_avg_pool2d.py +++ b/backends/arm/test/ops/test_avg_pool2d.py @@ -9,9 +9,11 @@ from typing import Tuple +import pytest + import torch -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineBI, @@ -64,15 +66,24 @@ def forward(self, x): @common.parametrize("test_module", test_modules) +@pytest.mark.tosa_ref_model def test_avgpool2d_tosa_MI(test_module): model, input_tensor = test_module - pipeline = TosaPipelineMI[input_t](model, input_tensor, aten_op, exir_op) - pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) - pipeline.run() + pipeline = TosaPipelineMI[input_t]( + model, + input_tensor, + aten_op, + exir_op, + run_on_tosa_ref_model=conftest.is_option_enabled("tosa_ref_model"), + ) + if conftest.is_option_enabled("tosa_ref_model"): + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) + pipeline.run() @common.parametrize("test_module", test_modules) +@pytest.mark.tosa_ref_model def test_avgpool2d_tosa_BI(test_module): model, input_tensor = test_module @@ -82,9 +93,11 @@ def test_avgpool2d_tosa_BI(test_module): aten_op, exir_op, symmetric_io_quantization=True, + run_on_tosa_ref_model=conftest.is_option_enabled("tosa_ref_model"), ) - pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) - pipeline.run() + if conftest.is_option_enabled("tosa_ref_model"): + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) + pipeline.run() @common.parametrize("test_module", test_modules) diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl index 832dcb3286c..acb27f13798 100644 --- a/backends/arm/test/targets.bzl +++ b/backends/arm/test/targets.bzl @@ -13,6 +13,7 @@ def define_arm_tests(): # Operators test_files += [ + "ops/test_avg_pool2d.py", "ops/test_linear.py", "ops/test_slice.py", "ops/test_sigmoid.py", From bf5b99a5211c37eb0fdba00b1fbac686e7d72446 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 7 May 2025 18:57:59 -0700 Subject: [PATCH 003/178] Update buck2 to 2025-05-06 (#10742) Notably, pinned prelude version includes https://github.com/facebook/buck2-prelude/commit/958af4f5e064aed9fcbfc51d68a052835671a2ff . Also, we're able to simplify our Buck versioning logic now that Buck has consistent versions across platforms (https://github.com/facebook/buck2/issues/828#issuecomment-2755327893) --- .ci/docker/ci_commit_pins/buck2.txt | 2 +- third-party/prelude | 2 +- tools/cmake/resolve_buck.py | 48 ++++++++--------------------- 3 files changed, 15 insertions(+), 37 deletions(-) diff --git a/.ci/docker/ci_commit_pins/buck2.txt b/.ci/docker/ci_commit_pins/buck2.txt index 1b22c8ffc09..38d6362c8cb 100644 --- a/.ci/docker/ci_commit_pins/buck2.txt +++ b/.ci/docker/ci_commit_pins/buck2.txt @@ -1 +1 @@ -2024-12-16 +2025-05-06 diff --git a/third-party/prelude b/third-party/prelude index 851d3f09c45..48c249f8c7b 160000 --- a/third-party/prelude +++ b/third-party/prelude @@ -1 +1 @@ -Subproject commit 851d3f09c452937fc5adef27e2c50f7f304f1646 +Subproject commit 48c249f8c7b99ff501d6e857754760315072b306 diff --git a/tools/cmake/resolve_buck.py b/tools/cmake/resolve_buck.py index 6da0a81b6de..f9c42a0a3c8 100644 --- a/tools/cmake/resolve_buck.py +++ b/tools/cmake/resolve_buck.py @@ -15,7 +15,7 @@ from dataclasses import dataclass from pathlib import Path -from typing import Sequence, Union +from typing import Union import buck_util import zstd @@ -46,54 +46,34 @@ def _buck_version_path() -> Path: @dataclass class BuckInfo: archive_name: str - target_versions: Sequence[str] -# Mapping of os family and architecture to buck2 binary versions. The -# target version is the hash given by running 'buck2 --version'. The +# Mapping of os family and architecture to buck2 archive name. The target version is the +# hash given by running 'buck2 --version', which is now consistent across platforms. The # archive name is the archive file name to download, as seen under # https://github.com/facebook/buck2/releases/. # -# To add or update versions, download the appropriate version of buck2 -# and run 'buck2 --version'. Add the corresponding entry to the platform -# map below, and if adding new os families or architectures, update the -# platform detection logic in resolve_buck2(). -# -# Some platforms (linux) provide multiple binaries (GNU and MUSL). All -# versions in the list are accepted when validating a user-provided or -# system buck2. +# To update Buck2, download the appropriate version of buck2 for your platform, run +# 'buck2 --version', and update BUCK_TARGET_VERSION. To add a new platform, add the +# corresponding entry to the platform map below, and if adding new os families or +# architectures, update the platform detection logic in resolve_buck2(). +BUCK_TARGET_VERSION = "2025-05-06-201beb86106fecdc84e30260b0f1abb5bf576988" + BUCK_PLATFORM_MAP = { ("linux", "x86_64"): BuckInfo( archive_name="buck2-x86_64-unknown-linux-musl.zst", - target_versions=[ - # MUSL - "edae27cfca00053d9c5f7c7be81b6b0d7d07573a50be374ce53a9d8692afa5fc", - # GNU - "10334cb20cb7c321", - ], ), ("linux", "aarch64"): BuckInfo( archive_name="buck2-aarch64-unknown-linux-gnu.zst", - target_versions=[ - # MUSL - "5d7af382acbe0dde70f0e9b0a0bc36deea906077ec1ffe80d3fa280490109051", - # GNU - "08d4382de22fab275978abc7c27c001d7823eb2f", - ], ), ("darwin", "aarch64"): BuckInfo( archive_name="buck2-aarch64-apple-darwin.zst", - target_versions=["f3b7a37732803ed090cd8a37f00cc000"], ), ("darwin", "x86_64"): BuckInfo( archive_name="buck2-x86_64-apple-darwin.zst", - target_versions=["9c9a583658d43e82b41f3fc9d369a9b0"], ), ("windows", "x86_64"): BuckInfo( archive_name="buck2-x86_64-pc-windows-msvc.exe.zst", - target_versions=[ - "c7d378f3f307e9590f0b29a5f7f1b21b8e784f4e4bd30a0160b2a69df50d2ee0" - ], ), } @@ -160,13 +140,13 @@ def resolve_buck2(args: argparse.Namespace) -> Union[str, int]: # If we have an explicit buck2 arg, check the version and fail if # there is a mismatch. ver = buck_util.get_buck2_version(args.buck2) - if ver in buck_info.target_versions: + if ver == BUCK_TARGET_VERSION: return args.buck2 else: print( f'The provided buck2 binary "{args.buck2}" reports version ' f'"{ver}", but ExecuTorch needs version ' - f'"{buck_info.target_versions[0]}". Ensure that the correct buck2' + f'"{BUCK_TARGET_VERSION}". Ensure that the correct buck2' " version is installed or avoid explicitly passing the BUCK2 " "version to automatically download the correct version.", file=sys.stderr, @@ -181,7 +161,7 @@ def resolve_buck2(args: argparse.Namespace) -> Union[str, int]: # Look for system buck2 and check version. Note that this can return # None. ver = buck_util.get_buck2_version("buck2") - if ver in buck_info.target_versions: + if ver == BUCK_TARGET_VERSION: # Use system buck2. return "buck2" else: @@ -190,9 +170,7 @@ def resolve_buck2(args: argparse.Namespace) -> Union[str, int]: os.makedirs(cache_dir, exist_ok=True) buck2_local_path = ( - (cache_dir / f"buck2-{buck_info.target_versions[0]}") - .absolute() - .as_posix() + (cache_dir / f"buck2-{BUCK_TARGET_VERSION}").absolute().as_posix() ) # Check for a previously cached buck2 binary. The filename includes From bb7e50f095533a88437c7aa457e204c1bf752544 Mon Sep 17 00:00:00 2001 From: Hansong <107070759+kirklandsign@users.noreply.github.com> Date: Wed, 7 May 2025 19:23:49 -0700 Subject: [PATCH 004/178] Tests use executorch_core Differential Revision: D74369346 Pull Request resolved: https://github.com/pytorch/executorch/pull/10764 --- examples/selective_build/CMakeLists.txt | 9 ++++----- runtime/kernel/test/CMakeLists.txt | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/selective_build/CMakeLists.txt b/examples/selective_build/CMakeLists.txt index db570bb98c3..fc059c2cc68 100644 --- a/examples/selective_build/CMakeLists.txt +++ b/examples/selective_build/CMakeLists.txt @@ -43,7 +43,7 @@ find_package( gflags REQUIRED PATHS ${CMAKE_CURRENT_BINARY_DIR}/../../third-party ) -target_include_directories(executorch INTERFACE ${_common_include_directories}) +target_include_directories(executorch_core INTERFACE ${_common_include_directories}) # ------------------------------ OPTIONS BEGIN ------------------------------- @@ -91,7 +91,7 @@ if(EXECUTORCH_SELECT_OPS_YAML) # custom_kernels: C++ kernel implementations of custom ops # add_library(custom_kernels ${kernel_sources}) - target_link_libraries(custom_kernels PRIVATE executorch) + target_link_libraries(custom_kernels PRIVATE executorch_core) target_compile_options(custom_kernels PUBLIC ${_common_compile_options}) list(APPEND _kernel_lib custom_kernels) @@ -117,7 +117,7 @@ generate_bindings_for_kernels( ) gen_operators_lib( - LIB_NAME "select_build_lib" KERNEL_LIBS ${_kernel_lib} DEPS executorch + LIB_NAME "select_build_lib" KERNEL_LIBS ${_kernel_lib} DEPS executorch_core ) list(TRANSFORM _executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/") @@ -131,10 +131,9 @@ if(CMAKE_BUILD_TYPE EQUAL "Release") target_link_options(selective_build_test PRIVATE "LINKER:--gc-sections") endif() target_link_libraries( - selective_build_test PRIVATE executorch gflags select_build_lib + selective_build_test PRIVATE executorch_core gflags select_build_lib ) target_link_options_shared_lib(select_build_lib) -target_link_options_shared_lib(executorch) target_compile_options(selective_build_test PUBLIC ${_common_compile_options}) # Print all summary diff --git a/runtime/kernel/test/CMakeLists.txt b/runtime/kernel/test/CMakeLists.txt index 9ff47fbefd5..5a9c4f0febf 100644 --- a/runtime/kernel/test/CMakeLists.txt +++ b/runtime/kernel/test/CMakeLists.txt @@ -20,7 +20,7 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake) add_executable(operator_registry_test operator_registry_test.cpp) target_link_libraries( - operator_registry_test GTest::gtest GTest::gtest_main GTest::gmock executorch + operator_registry_test GTest::gtest GTest::gtest_main GTest::gmock executorch_core ) target_include_directories(operator_registry_test PRIVATE ${EXECUTORCH_ROOT}/..) add_test(operator_registry_test operator_registry_test) @@ -28,7 +28,7 @@ add_test(operator_registry_test operator_registry_test) add_executable(kernel_runtime_context_test kernel_runtime_context_test.cpp) target_link_libraries( kernel_runtime_context_test GTest::gtest GTest::gtest_main GTest::gmock - executorch + executorch_core ) target_include_directories( kernel_runtime_context_test PRIVATE ${EXECUTORCH_ROOT}/.. @@ -47,7 +47,7 @@ add_executable( ) target_link_libraries( operator_registry_max_kernel_num_test GTest::gtest GTest::gtest_main - GTest::gmock executorch + GTest::gmock executorch_core ) target_compile_definitions( operator_registry_max_kernel_num_test PRIVATE "-DMAX_KERNEL_NUM=1" From b1d00e2a46b17864545b250dbfd17de15c11c9e9 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Thu, 8 May 2025 02:39:22 -0400 Subject: [PATCH 005/178] [ET-VK] Introduce generic export pass for fusing Q/DQ nodes (#10771) ## Context When quantizing models with the PT2E quantization flow, quantize/dequantize nodes will be inserted into the graph. However, these quantize/dequantize nodes must be fused with operators such as `aten.linear.default` to produce nodes corresponding to quantized operators (e.g. `weight_int8pack_mm`) in order for quantized operator implementations to be called at runtime. Currently, the op fusion is done by the `fuse_dequant_linear.py` pass, however, this only handles one specific fusion pattern to generate a `weight_int8pack_mm` operator. As more quantized operators are to be supported in ET-VK via the PT2E quantization flow, a more generic fusion pass is needed that can handle a variety of fusion patterns. ## Changes Introduce the `FuseQuantizedOpsTransform()` pass. I elected to introduce a new pass under the `backends/vulkan/_passes` directory, as opposed to modifying the existing pass because I anticipate the majority of the fusion patterns to be specific to ET-VK. Remove the existing `FuseDequantLinearPass()` Switch to using the `FuseQuantizedOpsTransform` pass instead of the old `FuseDequantLinear` pass. Add `test_vulkan_passes` Python test to test export passes. Some small refactors to `test_vulkan_delegate` Python test to improve code organizations. Differential Revision: [D73794042](https://our.internmc.facebook.com/intern/diff/D73794042/) --- backends/transforms/fuse_dequant_linear.py | 77 ------ backends/transforms/targets.bzl | 15 -- backends/vulkan/_passes/TARGETS | 19 ++ backends/vulkan/_passes/__init__.py | 4 + backends/vulkan/_passes/fuse_quantized_ops.py | 229 ++++++++++++++++++ backends/vulkan/custom_ops_lib.py | 47 ++++ backends/vulkan/quantizer/vulkan_quantizer.py | 63 ++--- backends/vulkan/targets.bzl | 2 +- backends/vulkan/test/TARGETS | 13 + backends/vulkan/test/test_vulkan_delegate.py | 213 ++++++++++------ backends/vulkan/test/test_vulkan_passes.py | 151 ++++++++++++ backends/vulkan/utils.py | 69 ++++++ backends/vulkan/vulkan_preprocess.py | 4 +- extension/llm/export/quantizer_lib.py | 8 +- 14 files changed, 712 insertions(+), 202 deletions(-) delete mode 100644 backends/transforms/fuse_dequant_linear.py create mode 100644 backends/vulkan/_passes/fuse_quantized_ops.py create mode 100644 backends/vulkan/test/test_vulkan_passes.py diff --git a/backends/transforms/fuse_dequant_linear.py b/backends/transforms/fuse_dequant_linear.py deleted file mode 100644 index 235715ac74f..00000000000 --- a/backends/transforms/fuse_dequant_linear.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-strict - -import torch - -from executorch.exir.dialects._ops import ops as exir_ops -from executorch.exir.pass_base import ExportPass, PassResult - - -class FuseDequantLinearPass(ExportPass): - """ - Fuses weight dequantize_per_channel nodes with linear nodes into - weight_int8pack_mm nodes, for 8-bit weight-only quantization. - - Replaces dq(weight) -> linear(activation, dq) with weight_int8pack_mm - Replaces dq(weight) -> linear(activation, dq, bias) with weight_int8pack_mm -> add - """ - - def fuse_dequant_with_linear( - self, - graph_module: torch.fx.GraphModule, - dequant_node: torch.fx.Node, - linear_node: torch.fx.Node, - ) -> None: - activations = linear_node.args[0] - bias = None - if len(linear_node.args) > 2: - bias = linear_node.args[2] - quant_weight = dequant_node.args[0] - scale = dequant_node.args[1] - - with graph_module.graph.inserting_before(linear_node): - weight_int8pack_mm_node = graph_module.graph.create_node( - "call_function", - exir_ops.edge.aten._weight_int8pack_mm.default, - (activations, quant_weight, scale), - ) - if bias: - add_node = graph_module.graph.create_node( - "call_function", - exir_ops.edge.aten.add.Tensor, - (weight_int8pack_mm_node, bias), - ) - linear_node.replace_all_uses_with(add_node) - else: - linear_node.replace_all_uses_with(weight_int8pack_mm_node) - graph_module.graph.erase_node(linear_node) - graph_module.graph.erase_node(dequant_node) - - def is_node_target( - self, node: torch.fx.Node, target: torch._ops.OperatorBase - ) -> bool: - return node.op == "call_function" and node.target == target - - def call(self, graph_module: torch.fx.GraphModule) -> PassResult: - for node in graph_module.graph.nodes: - if self.is_node_target(node, exir_ops.edge.aten.linear.default): - weight_node = node.args[1] - if self.is_node_target( - weight_node, - exir_ops.edge.quantized_decomposed.dequantize_per_channel.default, - ): - # only fuse if weight tensor is int8 packed - quant_weight = weight_node.args[0] - if quant_weight.meta["val"].dtype != torch.int8: - continue - self.fuse_dequant_with_linear(graph_module, weight_node, node) - - graph_module.recompile() - graph_module = super().call(graph_module).graph_module - - return PassResult(graph_module, True) diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl index 66ff9111f52..71980195962 100644 --- a/backends/transforms/targets.bzl +++ b/backends/transforms/targets.bzl @@ -77,21 +77,6 @@ def define_common_targets(): ], ) - runtime.python_library( - name = "fuse_dequant_linear", - srcs = ["fuse_dequant_linear.py"], - visibility = [ - "//executorch/backends/...", - ], - deps = [ - ":utils", - "//caffe2:torch", - "//executorch/exir:pass_base", - "//executorch/exir:sym_util", - "//executorch/exir/dialects:lib", - ], - ) - runtime.python_library( name = "view_copy_to_squeeze_unsqueeze", srcs = ["view_copy_to_squeeze_unsqueeze.py"], diff --git a/backends/vulkan/_passes/TARGETS b/backends/vulkan/_passes/TARGETS index 5478ad0eab6..cfe20892994 100644 --- a/backends/vulkan/_passes/TARGETS +++ b/backends/vulkan/_passes/TARGETS @@ -3,6 +3,23 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") oncall("executorch") +runtime.python_library( + name = "fuse_quantized_ops", + srcs = ["fuse_quantized_ops.py"], + visibility = [ + "//executorch/backends/...", + ], + deps = [ + "//caffe2:torch", + "//executorch/backends/transforms:utils", + "//executorch/backends/vulkan:custom_ops_lib", + "//executorch/backends/vulkan:utils_lib", + "//executorch/exir:pass_base", + "//executorch/exir:sym_util", + "//executorch/exir/dialects:lib", + ], +) + runtime.python_library( name = "insert_prepack_nodes", srcs = ["insert_prepack_nodes.py"], @@ -13,6 +30,7 @@ runtime.python_library( "//caffe2:torch", "//executorch/exir:pass_base", "//executorch/backends/vulkan:utils_lib", + "//executorch/backends/vulkan:op_registry", ], ) @@ -110,6 +128,7 @@ runtime.python_library( "//executorch/examples/...", ], deps = [ + ":fuse_quantized_ops", ":insert_prepack_nodes", ":int4_weight_only_quantizer", ":remove_asserts", diff --git a/backends/vulkan/_passes/__init__.py b/backends/vulkan/_passes/__init__.py index 220afa6a35c..7ff93a6ee38 100644 --- a/backends/vulkan/_passes/__init__.py +++ b/backends/vulkan/_passes/__init__.py @@ -6,6 +6,9 @@ # pyre-strict +from executorch.backends.vulkan._passes.fuse_quantized_ops import ( + FuseQuantizedOpsTransform, +) from executorch.backends.vulkan._passes.insert_prepack_nodes import insert_prepack_nodes from executorch.backends.vulkan._passes.int4_weight_only_quantizer import ( VkInt4WeightOnlyQuantizer, @@ -26,6 +29,7 @@ from executorch.backends.vulkan._passes.tag_memory_meta_pass import TagMemoryMetaPass __all__ = [ + "FuseQuantizedOpsTransform", "insert_prepack_nodes", "VkInt4WeightOnlyQuantizer", "remove_asserts", diff --git a/backends/vulkan/_passes/fuse_quantized_ops.py b/backends/vulkan/_passes/fuse_quantized_ops.py new file mode 100644 index 00000000000..d510e1d4342 --- /dev/null +++ b/backends/vulkan/_passes/fuse_quantized_ops.py @@ -0,0 +1,229 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +from typing import Optional, Tuple + +import executorch.backends.vulkan.utils as utils +import torch + +import torch.nn.functional as F + +from executorch.backends.transforms.utils import get_param_tensor, is_param_node +from executorch.exir import ExportedProgram +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass, PassResult + +################# +## linear_qcnw ## +################# + + +def matches_linear_qcnw_pattern( # noqa: C901 + program: ExportedProgram, node: torch.fx.Node +) -> Optional[Tuple[torch.qscheme, int]]: + """ + Checks if the nodes surrounding a linear node matches the pattern for weight only + quantized linear, where the weight is quantized channelswise to n bits. + + If the graph pattern matches, then return a tuple of (quantization_method, nbits) + describing the type of quantization used for the weights. Otherwise, return None. + """ + if not utils.is_linear_node(node): + return None + + input_node = node.args[0] + weight_node = node.args[1] + + # Type checking + if not isinstance(weight_node, torch.fx.Node): + return None + if not isinstance(input_node, torch.fx.Node): + return None + + # The input arg should not be a dequant node; if it is, then it is indicative that + # dynamically quantized linear should be used instead + if utils.is_dequant_node(input_node): + return None + + # The weight arg should be a dequant node dequantizing the quantized weight + # Furthermore, the op expects per channel quantization of the weight + if not utils.is_dequant_per_channel_node(weight_node): + return None + + orig_weight = weight_node.args[0] + zeros = weight_node.args[2] + + # Type checking + if not isinstance(orig_weight, torch.fx.Node): + return None + if not is_param_node(program, orig_weight): + return None + if not isinstance(zeros, torch.fx.Node): + return None + if not is_param_node(program, zeros): + return None + + zeros_tensor = get_param_tensor(program, zeros) + if not isinstance(zeros_tensor, torch.Tensor): + return None + + quant_method = torch.per_channel_affine + # Check for symmetric quantization, where the zeros used for dequantization will + # actually be all zeros. + if torch.all(zeros_tensor == 0): + quant_method = torch.per_channel_symmetric + + orig_weight_tensor = get_param_tensor(program, orig_weight) + if not isinstance(orig_weight_tensor, torch.Tensor): + return None + # Sanity check the dtype of the quantized weight + if orig_weight_tensor.dtype != torch.int8: + return None + + quant_min = orig_weight_tensor.min().item() + quant_max = orig_weight_tensor.max().item() + # Determine the number of bits the weight has been quantized to + if quant_min >= -8 and quant_max <= 7: + return quant_method, 4 + elif quant_min >= -128 and quant_max <= 127: + return quant_method, 8 + + return None + + +def pack_4bit_weight_tensor(inp: torch.Tensor) -> torch.Tensor: + """ + Given a 8-bit weight tensor containing values quantized to 4 bits, create a packed + weight tensor by packing 2 4-bit values in one unsigned 8-bit value. + + An input weight tensor of shape (M, K) will produce a packed weight tensor of shape + (M, K / 2). + """ + + # Assert we got a properly quantized tensor. + min, max = inp.min().item(), inp.max().item() + assert ( + max <= 7 and min >= -8 + ), f"convert_to_qc4w: [min,max] out of [-8, 7] range, got [{min}, {max}]" + + # Assuming we have a 2d tensor + if inp.ndim != 2: + inp = inp.squeeze() + assert ( + inp.ndim == 2 + ), f"convert_to_qc4w: expecting input tensor to be 2d, got {inp.ndim}" + + # pad ic + if inp.shape[-1] % 2 != 0: + inp = F.pad(input=inp, pad=(0, 1, 0, 0), mode="constant", value=0) + + # Shape after padding + oc, ic = inp.shape + assert ic % 2 == 0, "convert_to_qc4w: expecting ic to be even" + + # Adjust inp tensor for zp + inp = inp.to(dtype=torch.uint8) + 8 + + # Prepare the Result tensor + inp = inp.contiguous().view(-1) + return (inp[::2] << 4 | inp[1::2]).view(oc, int(ic / 2)) + + +def fuse_into_linear_qcnw_node( + program: ExportedProgram, + graph_module: torch.fx.GraphModule, + linear_node: torch.fx.Node, + quant_method: torch.qscheme, + nbits: int, +) -> None: + """ + The weight_int8pack_mm operator represents a weight only quantized linear operator, + where the weight tensor has been quantized channelswise to nbits bits. + + After the PT2E quantization flow, the expected graph pattern is + + dq_weight = dequantize(weight, scales) + out = linear(activation, dq_weight, bias?) + + The goal of this function is to condense that sequence into + + out = quantized_linear(activation, dq_weight, scales) + out = out + bias + """ + activation = linear_node.args[0] + dq_weight_node = linear_node.args[1] + assert isinstance(activation, torch.fx.Node) + assert isinstance(dq_weight_node, torch.fx.Node) + + bias = None + if len(linear_node.args) > 2: + bias = linear_node.args[2] + assert isinstance(bias, torch.fx.Node) + + orig_weight = dq_weight_node.args[0] + scale = dq_weight_node.args[1] + + # For 4 bit quantization, pack the weight tensor + if nbits == 4: + assert isinstance(orig_weight, torch.fx.Node) + orig_weight_tensor = get_param_tensor(program, orig_weight) + assert isinstance(orig_weight_tensor, torch.Tensor) + packed_weight_tensor = pack_4bit_weight_tensor(orig_weight_tensor) + utils.update_program_state_dict( + program, + orig_weight.name, + packed_weight_tensor, + ) + orig_weight.meta["val"] = orig_weight.meta["val"][:, ::2].to(torch.uint8) + + if nbits == 8 and quant_method == torch.per_channel_symmetric: + op_target = exir_ops.edge.aten._weight_int8pack_mm.default + elif nbits == 4 and quant_method == torch.per_channel_symmetric: + op_target = exir_ops.edge.et_vk.linear_qcs4w.default + else: + raise NotImplementedError( + "only 4 and 8 bits per channel symmetric quant supported for linear_qcnw" + ) + + with graph_module.graph.inserting_before(linear_node): + weight_int8pack_mm_node = graph_module.graph.create_node( + "call_function", + op_target, + (activation, orig_weight, scale), + ) + if bias: + add_node = graph_module.graph.create_node( + "call_function", + exir_ops.edge.aten.add.Tensor, + (weight_int8pack_mm_node, bias), + ) + linear_node.replace_all_uses_with(add_node) + else: + linear_node.replace_all_uses_with(weight_int8pack_mm_node) + graph_module.graph.erase_node(linear_node) + graph_module.graph.erase_node(dq_weight_node) + + +class FuseQuantizedOpsTransform(ExportPass): + def __init__(self, exported_program: ExportedProgram) -> None: + super().__init__() + self.program = exported_program + + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: + for node in graph_module.graph.nodes: + qcnw_details = matches_linear_qcnw_pattern(self.program, node) + if qcnw_details is not None: + qcnw_method, qcnw_nbits = qcnw_details + fuse_into_linear_qcnw_node( + self.program, graph_module, node, qcnw_method, qcnw_nbits + ) + + graph_module.recompile() + graph_module = super().call(graph_module).graph_module + + return PassResult(graph_module, True) diff --git a/backends/vulkan/custom_ops_lib.py b/backends/vulkan/custom_ops_lib.py index 0275239a86a..af6fcbfbb14 100644 --- a/backends/vulkan/custom_ops_lib.py +++ b/backends/vulkan/custom_ops_lib.py @@ -184,6 +184,53 @@ def linear_weight_int4_impl( lib.impl(name, linear_weight_int4_impl, "CompositeExplicitAutograd") linear_weight_int4_op = getattr(getattr(torch.ops, namespace), name) +################# +## linear_qcs4w ## +################# + + +def linear_qcs4w( + x: torch.Tensor, + weights_4x2: torch.Tensor, + scales: torch.Tensor, +): + original_x_shape = x.shape + x = x.reshape(-1, original_x_shape[-1]) + + unpacked_weights_shape = weights_4x2.shape + out_features = unpacked_weights_shape[0] + in_features = unpacked_weights_shape[1] + + weights_unpacked = torch.empty( + (out_features, in_features * 2), dtype=torch.int8, device=weights_4x2.device + ) + + weights_unpacked[:, ::2] = weights_4x2 >> 4 + weights_unpacked[:, 1::2] = weights_4x2 & 0x0F + + n_bit = 8 + quant_min = -(2 ** (n_bit - 1)) + quant_max = 2 ** (n_bit - 1) - 1 + dq_weights = torch.ops.quantized_decomposed.dequantize_per_channel( + weights_unpacked, + scales, + None, + 0, + quant_min, + quant_max, + torch.int8, + ) + + out = torch.nn.functional.linear(x, dq_weights) + out_shape = original_x_shape[:-1] + (out_features,) + return out.reshape(out_shape) + + +name = "linear_qcs4w" +lib.define(f"{name}(Tensor self, Tensor weight, Tensor scales) -> Tensor") +lib.impl(name, linear_qcs4w, "CompositeExplicitAutograd") +linear_qc4w_op = getattr(getattr(torch.ops, namespace), name) + ###################### ## apply_rotary_emb ## ###################### diff --git a/backends/vulkan/quantizer/vulkan_quantizer.py b/backends/vulkan/quantizer/vulkan_quantizer.py index 2ea3e321dc3..b2f1a658040 100644 --- a/backends/vulkan/quantizer/vulkan_quantizer.py +++ b/backends/vulkan/quantizer/vulkan_quantizer.py @@ -9,7 +9,7 @@ from __future__ import annotations import functools -from typing import Any, Callable, Dict, Optional +from typing import Callable, Optional import torch from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import ( @@ -18,53 +18,60 @@ propagate_annotation, QuantizationConfig, ) -from torch.ao.quantization.observer import MinMaxObserver, PerChannelMinMaxObserver -from torch.ao.quantization.qconfig import _ObserverOrFakeQuantizeConstructor +from torch.ao.quantization.observer import PerChannelMinMaxObserver from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer from torch.fx import Node __all__ = [ "VulkanQuantizer", - "get_weight_quantization_config", + "get_linear_weight_qcs_qspec", + "get_linear_weight_only_qcs_xnn_qconfig", ] -@functools.lru_cache -def get_weight_quantization_config( - is_per_channel: bool = True, - weight_qmin: int = -128, - weight_qmax: int = 127, -) -> QuantizationConfig: - - weight_qscheme = ( - torch.per_channel_symmetric if is_per_channel else torch.per_tensor_symmetric - ) - weight_observer_or_fake_quant_ctr: _ObserverOrFakeQuantizeConstructor = ( - PerChannelMinMaxObserver if is_per_channel else MinMaxObserver - ) - extra_args: Dict[str, Any] = {"eps": 2**-12} +def get_linear_weight_qcs_qspec(quant_bits: int) -> QuantizationSpec: + """ + Return a QuantizationSpec to perform per-channel symmetric (i.e. "qcs") quantization + of weight tensors of linear layers to the number of bits specified by quant_bits. + """ + weight_observer = PerChannelMinMaxObserver + assert quant_bits in { + 8, + 4, + }, f"Unsupported weight quantization bits: {quant_bits}" - weight_quantization_spec = QuantizationSpec( + quant_min = -(2 ** (quant_bits - 1)) + quant_max = 2 ** (quant_bits - 1) - 1 + qscheme = torch.per_channel_symmetric + + return QuantizationSpec( dtype=torch.int8, - quant_min=weight_qmin, - quant_max=weight_qmax, - qscheme=weight_qscheme, + quant_min=quant_min, + quant_max=quant_max, + qscheme=qscheme, ch_axis=0, is_dynamic=False, - observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr.with_args( - **extra_args - ), + observer_or_fake_quant_ctr=weight_observer, ) - quantization_config = QuantizationConfig( + +@functools.lru_cache +def get_linear_weight_only_qcs_xnn_qconfig(quant_bits: int) -> QuantizationConfig: + """ + Return a XNNPACKQuantizer QuantizationConfig class instance that specifies + quantizing the weight tensors of linear layers using per-channel symmetric (qcs) + quantization to the number of bits specified by quant_bits. + """ + weight_qspec = get_linear_weight_qcs_qspec(quant_bits) + + return QuantizationConfig( input_activation=None, output_activation=None, - weight=weight_quantization_spec, + weight=weight_qspec, bias=None, is_qat=False, ) - return quantization_config _SUPPORTED_OPS = [ diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl index aafc87ad2c3..665fde103fc 100644 --- a/backends/vulkan/targets.bzl +++ b/backends/vulkan/targets.bzl @@ -280,6 +280,7 @@ def define_common_targets(is_fbcode = False): deps = [ "//caffe2:torch", "//executorch/exir:tensor", + "//executorch/exir/backend/canonical_partitioners:config_partitioner_lib", "//executorch/backends/vulkan/serialization:lib", ] ) @@ -332,7 +333,6 @@ def define_common_targets(is_fbcode = False): "//executorch/backends/transforms:addmm_mm_to_linear", "//executorch/backends/transforms:fuse_batch_norm_with_conv", "//executorch/backends/transforms:fuse_conv_with_clamp", - "//executorch/backends/transforms:fuse_dequant_linear", "//executorch/backends/transforms:fuse_view_copy", "//executorch/backends/transforms:remove_clone_ops", "//executorch/backends/transforms:view_copy_to_squeeze_unsqueeze", diff --git a/backends/vulkan/test/TARGETS b/backends/vulkan/test/TARGETS index 5ac87892762..8f07040d586 100644 --- a/backends/vulkan/test/TARGETS +++ b/backends/vulkan/test/TARGETS @@ -24,6 +24,19 @@ python_unittest( ], ) +python_unittest( + name = "test_vulkan_passes", + srcs = [ + "test_vulkan_passes.py", + ], + deps = [ + "//caffe2:torch", + "//executorch/backends/vulkan/_passes:vulkan_passes", + "//executorch/backends/vulkan/quantizer:vulkan_quantizer", + "//executorch/backends/vulkan:vulkan_preprocess", + ] +) + python_unittest( name = "test_vulkan_delegate_header", srcs = [ diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index 5fba5ed54cf..b57710974e8 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -15,10 +15,19 @@ from executorch.backends.transforms.convert_dtype_pass import I64toI32 from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner + from executorch.backends.vulkan.vulkan_preprocess import VulkanBackend -from executorch.exir import EdgeCompileConfig -from torch.export import Dim, export, ExportedProgram +from executorch.exir import ( + EdgeCompileConfig, + EdgeProgramManager, + ExecutorchProgramManager, +) + +from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e + +from torch.ao.quantization.quantizer import Quantizer +from torch.export import Dim, export, export_for_training, ExportedProgram ctypes.CDLL("libvulkan.so.1") @@ -30,11 +39,66 @@ from executorch.extension.pytree import tree_flatten -class TestBackends(unittest.TestCase): - _edge_compile_config: EdgeCompileConfig = EdgeCompileConfig( +def lower_module( + model: torch.nn.Module, sample_inputs: Tuple[torch.Tensor], dynamic_shapes=None +) -> EdgeProgramManager: + compile_options = {} + edge_compile_config = EdgeCompileConfig( + _skip_dim_order=False, # TODO(T182928844): Delegate dim order op to backend. + ) + + program: ExportedProgram = export( + model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True + ) + + edge_program = to_edge_transform_and_lower( + program, + compile_config=edge_compile_config, + transform_passes=[ + I64toI32(edge_compile_config._skip_dim_order), + ], + partitioner=[VulkanPartitioner(compile_options)], + ) + + return edge_program + + +def quantize_and_lower_module( + model: torch.nn.Module, + sample_inputs: Tuple[torch.Tensor], + quantizer: Quantizer, + dynamic_shapes=None, +) -> EdgeProgramManager: + compile_options = {} + edge_compile_config = EdgeCompileConfig( _skip_dim_order=False, # TODO(T182928844): Delegate dim order op to backend. ) + program = export_for_training( + model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True + ).module() + + program = prepare_pt2e(program, quantizer) # pyre-ignore + # Calibrate + program(*sample_inputs) + + program = convert_pt2e(program) + + program = export(program, sample_inputs, dynamic_shapes=dynamic_shapes) + + edge_program = to_edge_transform_and_lower( + program, + compile_config=edge_compile_config, + transform_passes=[ + I64toI32(edge_compile_config._skip_dim_order), + ], + partitioner=[VulkanPartitioner(compile_options)], + ) + + return edge_program + + +class TestVulkanBackend(unittest.TestCase): def assert_outputs_equal( self, model_output, @@ -88,6 +152,59 @@ def assert_outputs_equal( ) ) + def check_no_delegation(self, et_program: ExecutorchProgramManager): + self.assertEqual( + len(et_program.executorch_program.execution_plan[0].delegates), + 0, + ) + return + + def check_vk_delegation(self, et_program: ExecutorchProgramManager): + self.assertEqual( + et_program.executorch_program.execution_plan[0].delegates[0].id, + VulkanBackend.__name__, + ) + + def run_delegated_model_and_check_output( + self, + et_program: ExecutorchProgramManager, + model: torch.nn.Module, + sample_inputs: Tuple[torch.Tensor], + atol=1e-03, + rtol=1e-01, + test_inputs=None, + first_output_only=False, + ): + executorch_module = _load_for_executorch_from_buffer(et_program.buffer) + inputs_flattened, _ = tree_flatten(sample_inputs) + + model_output = executorch_module.run_method("forward", tuple(inputs_flattened)) + ref_output = model(*sample_inputs) + + self.assert_outputs_equal( + model_output, + ref_output, + atol=atol, + rtol=rtol, + first_output_only=first_output_only, + ) + + if test_inputs is not None: + for test_input in test_inputs: + test_inputs_flattened, _ = tree_flatten(test_input) + model_output = executorch_module.run_method( + "forward", tuple(test_inputs_flattened) + ) + ref_output = model(*test_input) + + self.assert_outputs_equal( + model_output, + ref_output, + atol=atol, + rtol=rtol, + first_output_only=first_output_only, + ) + def lower_module_and_test_output( self, model: torch.nn.Module, @@ -105,80 +222,29 @@ def lower_module_and_test_output( outputs with the outputs of the eager module. """ - def run_test(): - compile_options = {} + # Validate that the model can execute in eager mode + model.eval() + model(*sample_inputs) - # At least model should run in eager mode. - model.eval() - model(*sample_inputs) + edge_program = lower_module(model, sample_inputs, dynamic_shapes=dynamic_shapes) - program: ExportedProgram = export( - model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True - ) + et_program = edge_program.to_executorch() - edge_program = to_edge_transform_and_lower( - program, - compile_config=self._edge_compile_config, - transform_passes=[ - I64toI32(self._edge_compile_config._skip_dim_order), - ], - partitioner=[VulkanPartitioner(compile_options)], - ) - executorch_program = edge_program.to_executorch() - - if expect_no_delegates: - self.assertEqual( - len( - executorch_program.executorch_program.execution_plan[ - 0 - ].delegates - ), - 0, - ) - return - else: - self.assertEqual( - executorch_program.executorch_program.execution_plan[0] - .delegates[0] - .id, - VulkanBackend.__name__, - ) - - executorch_module = _load_for_executorch_from_buffer( - executorch_program.buffer - ) - inputs_flattened, _ = tree_flatten(sample_inputs) + if expect_no_delegates: + self.check_no_delegation(et_program) + return - model_output = executorch_module.run_method( - "forward", tuple(inputs_flattened) - ) - ref_output = model(*sample_inputs) - - self.assert_outputs_equal( - model_output, - ref_output, - atol=atol, - rtol=rtol, - first_output_only=first_output_only, - ) - - if test_inputs is not None: - for test_input in test_inputs: - test_inputs_flattened, _ = tree_flatten(test_input) - model_output = executorch_module.run_method( - "forward", tuple(test_inputs_flattened) - ) - ref_output = model(*test_input) + self.check_vk_delegation(et_program) - self.assert_outputs_equal( - model_output, - ref_output, - atol=atol, - rtol=rtol, - first_output_only=first_output_only, - ) - - run_test() + self.run_delegated_model_and_check_output( + et_program, + model, + sample_inputs, + atol, + rtol, + test_inputs=test_inputs, + first_output_only=first_output_only, + ) def test_vulkan_backend_add(self): # This test is the simplest test by manually lowering some submodules, we can use paritioner @@ -942,6 +1008,7 @@ def forward(self, x): sample_inputs, ) + @unittest.skip("layer norm compute shader not working with swiftshader") def test_vulkan_backend_native_layer_norm(self): class NativeLayerNormModule(torch.nn.Module): def __init__(self): diff --git a/backends/vulkan/test/test_vulkan_passes.py b/backends/vulkan/test/test_vulkan_passes.py new file mode 100644 index 00000000000..7572ebd5a5a --- /dev/null +++ b/backends/vulkan/test/test_vulkan_passes.py @@ -0,0 +1,151 @@ +import unittest +from typing import Optional, Tuple + +import torch + +from executorch.backends.transforms.addmm_mm_to_linear import AddmmToLinearTransform +from executorch.backends.vulkan._passes import FuseQuantizedOpsTransform + +from executorch.backends.vulkan.quantizer.vulkan_quantizer import ( + get_linear_weight_only_qcs_xnn_qconfig, + VulkanQuantizer, +) + +from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge + +from executorch.exir.backend.canonical_partitioners.config_partitioner import ( + format_target_name, +) + +from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e +from torch.ao.quantization.quantizer import Quantizer + +################### +## Common Models ## +################### + + +class SingleLinearModule(torch.nn.Module): + def __init__(self, K=256, N=128): + super().__init__() + self.K = K + self.N = N + self.linear = torch.nn.Linear(K, N, bias=False) + + def forward(self, x): + return self.linear(x) + + def get_sample_inputs(self): + sample_inputs = (torch.rand(size=(32, self.K), dtype=torch.float32),) + return sample_inputs + + +########### +## Tests ## +########### + + +def quantize_and_lower_module( + model: torch.nn.Module, + sample_inputs: Tuple[torch.Tensor], + quantizer: Quantizer, + dynamic_shapes=None, +) -> EdgeProgramManager: + edge_compile_config = EdgeCompileConfig( + _skip_dim_order=False, # TODO(T182928844): Delegate dim order op to backend. + _check_ir_validity=False, + ) + + program = torch.export.export_for_training( + model, sample_inputs, dynamic_shapes=dynamic_shapes, strict=True + ).module() + + program = prepare_pt2e(program, quantizer) # pyre-ignore + # Calibrate + program(*sample_inputs) + + program = convert_pt2e(program) + + program = torch.export.export(program, sample_inputs, dynamic_shapes=dynamic_shapes) + + edge_program = to_edge( + program, + compile_config=edge_compile_config, + ) + + return edge_program + + +def get_target_canonical_name(node: torch.fx.Node) -> Optional[str]: + if node.op != "call_function": + return None + node_name = format_target_name(node.target.__name__) # pyre-ignore + return node_name + + +def op_node_count(graph_module: torch.fx.GraphModule, canonical_op_name: str) -> int: + count = 0 + for node in graph_module.graph.nodes: + canonical_name = get_target_canonical_name(node) + if canonical_name is not None and canonical_name == canonical_op_name: + count += 1 + return count + + +class TestVulkanPasses(unittest.TestCase): + + def test_fuse_int8pack_mm(self): + K = 256 + N = 256 + model = SingleLinearModule(K, N) + sample_inputs = model.get_sample_inputs() + + quantizer = VulkanQuantizer() + quantizer.set_global(get_linear_weight_only_qcs_xnn_qconfig(8)) + + edge_manager = quantize_and_lower_module( + model, + sample_inputs, + quantizer, + ) + + ep = edge_manager._edge_programs["forward"] + edge_manager.transform( + [ + AddmmToLinearTransform(), + FuseQuantizedOpsTransform(ep), + ] + ) + + gm = ep.graph_module + + self.assertEqual(op_node_count(gm, "_weight_int8pack_mm.default"), 1) + self.assertEqual(op_node_count(gm, "dequantize_per_channel.default"), 0) + + def test_fuse_linear_qcs4w(self): + K = 256 + N = 256 + model = SingleLinearModule(K, N) + sample_inputs = model.get_sample_inputs() + + quantizer = VulkanQuantizer() + quantizer.set_global(get_linear_weight_only_qcs_xnn_qconfig(4)) + + edge_manager = quantize_and_lower_module( + model, + sample_inputs, + quantizer, + ) + + ep = edge_manager._edge_programs["forward"] + edge_manager.transform( + [ + AddmmToLinearTransform(), + FuseQuantizedOpsTransform(ep), + ] + ) + + gm = ep.graph_module + + self.assertEqual(op_node_count(gm, "linear_qcs4w.default"), 1) + self.assertEqual(op_node_count(gm, "dequantize_per_channel.default"), 0) diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py index fa032cd7b4f..eb949a6ace8 100644 --- a/backends/vulkan/utils.py +++ b/backends/vulkan/utils.py @@ -14,6 +14,10 @@ VkStorageType, ) +from executorch.exir.backend.canonical_partitioners.config_partitioner import ( + format_target_name, +) + from executorch.exir.tensor import TensorSpec from torch._export.utils import is_buffer, is_param @@ -22,11 +26,44 @@ from torch.export import ExportedProgram +from torch.export.exported_program import InputKind +from torch.export.graph_signature import TensorArgument + +_DQ_OPS = { + "dequantize_per_tensor.tensor", + "dequantize_per_tensor.default", + "dequantize_per_channel.default", + "dequantize_per_channel_group.default", + "dequantize_per_token.default", + "dequantize_affine.default", +} + ## ## Node type determination ## +def is_dequant_node(node: torch.fx.Node) -> bool: + if node.op != "call_function": + return False + node_name = format_target_name(node.target.__name__) # pyre-ignore + return node_name in _DQ_OPS + + +def is_dequant_per_channel_node(node: torch.fx.Node) -> bool: + if node.op != "call_function": + return False + node_name = format_target_name(node.target.__name__) # pyre-ignore + return node_name == "dequantize_per_channel.default" + + +def is_linear_node(node: torch.fx.Node) -> bool: + if node.op != "call_function": + return False + node_name = format_target_name(node.target.__name__) # pyre-ignore + return node_name == "linear.default" + + def is_get_attr_node(node: torch.fx.Node) -> bool: return isinstance(node, torch.fx.Node) and node.op == "get_attr" @@ -258,3 +295,35 @@ def get_node_storage_type(node: torch.fx.Node) -> Optional[VkStorageType]: def get_node_memory_layout(node: torch.fx.Node) -> Optional[VkMemoryLayout]: return get_node_spec_attr(node, "vk_memory_layout") + + +## +## Misc +## + + +def update_program_state_dict( + program: ExportedProgram, + buffer_name: str, + updated_tensor: torch.Tensor, +) -> None: + target_name = None + # Iterate over all the tensors in the graph signature, and find + # the one corresponding to the parameter/buffer name + for input_ in program.graph_signature.input_specs: + if ( + input_.kind in (InputKind.BUFFER, InputKind.PARAMETER) + and isinstance(input_.arg, TensorArgument) + and input_.arg.name == buffer_name + ): + target_name = input_.target + break + + # Assert that we found the parameter/buffer + assert ( + target_name is not None + ), f"could not find {buffer_name} in source program signature" + assert target_name in program.state_dict, f"could not find {target_name}" + + # Finally, overwrite the current tensor with updated tensor + program.state_dict[target_name] = updated_tensor diff --git a/backends/vulkan/vulkan_preprocess.py b/backends/vulkan/vulkan_preprocess.py index 188311e5f2c..4200df3e131 100644 --- a/backends/vulkan/vulkan_preprocess.py +++ b/backends/vulkan/vulkan_preprocess.py @@ -17,12 +17,12 @@ FuseBatchNormWithConvPass, ) from executorch.backends.transforms.fuse_conv_with_clamp import FuseClampPass -from executorch.backends.transforms.fuse_dequant_linear import FuseDequantLinearPass from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform from executorch.backends.transforms.view_copy_to_squeeze_unsqueeze import ( ViewCopyToSqueezeUnsqueezePass, ) from executorch.backends.vulkan._passes import ( + FuseQuantizedOpsTransform, insert_prepack_nodes, RemoveLocalScalarDenseOpsTransform, RemoveRedundantOpsTransform, @@ -152,7 +152,7 @@ def preprocess( # noqa: C901 [ RemoveRedundantOpsTransform(), AddmmToLinearTransform(), - FuseDequantLinearPass(), + FuseQuantizedOpsTransform(program), SqueezeUnsqueezeInputs(), FuseViewCopyTransform(), ViewCopyToSqueezeUnsqueezePass(), diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py index 24c3be2e802..d7b8b3a92b1 100644 --- a/extension/llm/export/quantizer_lib.py +++ b/extension/llm/export/quantizer_lib.py @@ -266,16 +266,12 @@ def get_coreml_quantizer(pt2e_quantize: str): def get_vulkan_quantizer(pt2e_quantize: str): from executorch.backends.vulkan.quantizer.vulkan_quantizer import ( - get_weight_quantization_config, + get_linear_weight_only_qcs_xnn_qconfig, VulkanQuantizer, ) if pt2e_quantize == "vulkan_8w": - config = get_weight_quantization_config( - is_per_channel=True, - weight_qmin=-128, - weight_qmax=127, - ) + config = get_linear_weight_only_qcs_xnn_qconfig(8) else: raise ValueError(f"Unsupported Vulkan quantizer specification {pt2e_quantize}") From 5e8295ef80db6c32b65592d43e5fa8e9134daba9 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Thu, 8 May 2025 02:40:32 -0400 Subject: [PATCH 006/178] [ET-VK] Implement linear_qcs4w (#10772) ## Context Title says it all! ## Changes Extended the implementation of `linear_qcsnw` to support packed 4-bit weight tensors. Differential Revision: [D73941991](https://our.internmc.facebook.com/intern/diff/D73941991/) --- backends/vulkan/op_registry.py | 7 +- .../runtime/graph/ops/glsl/indexing_utils.h | 18 +- .../graph/ops/glsl/linear_qcsnw_coop.glsl | 145 +++++++++----- .../graph/ops/glsl/linear_qcsnw_coop.yaml | 10 + .../graph/ops/glsl/linear_qcsnw_tiled.glsl | 115 ++++++++--- .../graph/ops/glsl/linear_qcsnw_tiled.yaml | 10 + .../graph/ops/impl/QuantizedLinearQCSNW.cpp | 88 ++++++--- .../graph/ops/impl/QuantizedLinearQGANW.cpp | 48 ----- .../vulkan/runtime/graph/ops/impl/Staging.cpp | 48 +++++ .../vulkan/runtime/graph/ops/impl/Staging.h | 7 + .../test/op_tests/linear_weight_int4_test.cpp | 182 +++++++++++++++++- 11 files changed, 528 insertions(+), 150 deletions(-) diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index aa3cca5f384..8502e254ec5 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -377,7 +377,12 @@ def register_mm_op(features: OpFeatures): return features -@update_features(exir_ops.edge.aten._weight_int8pack_mm.default) +@update_features( + [ + exir_ops.edge.aten._weight_int8pack_mm.default, + exir_ops.edge.et_vk.linear_qcs4w.default, + ] +) def register_int8_mm_op(features: OpFeatures): features.texture_impl = TextureImplFeatures( uses_axis_map=False, diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h index 2126104430f..2b41d2b7e1a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h @@ -41,22 +41,32 @@ /* * Fast division by 4 using bit shifting */ -#define div4(x) (x >> 2) +#define div4(x) ((x) >> 2) + +/* + * Fast multiplication by 4 using bit shifting + */ +#define mul4(x) ((x) << 2) /* * Divides input and rounds up to 4 */ -#define divup4(x) ((x + 3) >> 2) +#define divup4(x) (((x) + 3) >> 2) + +/* + * Divides input by denominator and rounds up + */ +#define divup(x, d) (((x) + (d) - 1) / (d)) /* * Aligns input to the next multiple of 4 */ -#define alignup4(x) ((x + 3) & -4) +#define alignup4(x) (((x) + 3) & -4) /* * Fast modulo by 4 using bit masking */ -#define mod4(x) (x & 3) +#define mod4(x) ((x) & 3) /* * Find the packed dimension of a tensor given its strides. The packed dimension diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl index 3ad9e759910..c766a3cd7d0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.glsl @@ -14,6 +14,7 @@ #define VEC4_T ${buffer_gvec_type(DTYPE, 4)} #define TILE_ROWS ${TILE_ROWS} +#define TILE_TXCOLS ${TILE_TXCOLS} #define NGROUPS 8 #define NWORKERS 8 @@ -29,7 +30,10 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, OUT_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, IN_STORAGE, is_scalar_array=False)} -${layout_declare_tensor(B, "r", "t_weight", "int8", WEIGHT_STORAGE, is_scalar_array=False)} +$if QUANT_NBITS == 4: + ${layout_declare_tensor(B, "r", "t_weight", "uint8", WEIGHT_STORAGE, is_scalar_array=False)} +$else: + ${layout_declare_tensor(B, "r", "t_weight", "int8", WEIGHT_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_scales", DTYPE, SCALES_STORAGE, is_scalar_array=False)} layout(push_constant) uniform restrict Block { @@ -42,12 +46,23 @@ layout(push_constant) uniform restrict Block { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -shared VEC4_T partial_c[NGROUPS][NWORKERS][TILE_ROWS]; +shared VEC4_T partial_sums[NGROUPS][NWORKERS][TILE_ROWS][TILE_TXCOLS]; void main() { - const uint out_width_ntexels = divup4(out_sizes.x); - const uint out_col = (gl_GlobalInvocationID.x % out_width_ntexels) << 2; - const uint out_row = (gl_GlobalInvocationID.x / out_width_ntexels) * TILE_ROWS; + // txcol stands for "texel column". One txcol corresponds to 4 scalar columns. + $if TILE_TXCOLS > 1: + const uint global_wg_x = uint(divup(out_sizes.x, 4 * TILE_TXCOLS)); + const uint out_txcol = uint( + (gl_GlobalInvocationID.x % global_wg_x) * TILE_TXCOLS); + $else: + const uint global_wg_x = uint(divup4(out_sizes.x)); + const uint out_txcol = uint(gl_GlobalInvocationID.x % global_wg_x); + + const uint out_row = uint( + (gl_GlobalInvocationID.x / global_wg_x) * TILE_ROWS); + + $if QUANT_NBITS == 4: + const uint weight_txcol = uint(out_txcol / 2); const int gid = int(gl_LocalInvocationID.x); // group id const int wid = int(gl_LocalInvocationID.z); // worker id @@ -56,46 +71,78 @@ void main() { return; } - VEC4_T a[TILE_ROWS]; - VEC4_T b[4]; - VEC4_T local_c[TILE_ROWS]; + VEC4_T mat1[TILE_ROWS]; + VEC4_T qmat2[4][TILE_TXCOLS]; + VEC4_T local_sums[TILE_ROWS][TILE_TXCOLS]; - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - local_c[i] = VEC4_T(0.0); + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + local_sums[r][${c}] = VEC4_T(0.0); } - $if SCALES_STORAGE == "buffer": - const VEC4_T scales = VEC4_T(t_scales[out_col >> 2]); - $else: - const VEC4_T scales = VEC4_T(texelFetch(t_scales, ivec2(out_col >> 2, 0), 0)); - - for (int pos = 4 * wid; pos < in_sizes.x; pos += (4 * NWORKERS)) { - // Preload t_weight - [[unroll]] for (int i = 0; i < 4; i++) { - $if WEIGHT_STORAGE == "buffer": - b[i] = t_weight[((pos + i) * weight_sizes.x + out_col) >> 2]; + VEC4_T scales[TILE_TXCOLS]; + $for c in range(TILE_TXCOLS): + $if SCALES_STORAGE == "buffer": + scales[${c}] = VEC4_T(t_scales[out_txcol + ${c}]); + $else: + scales[${c}] = VEC4_T( + texelFetch(t_scales, ivec2(out_txcol + ${c}, 0), 0)); + + for (int pos = (4 * wid), txpos = wid; + pos < in_sizes.x; + pos += (4 * NWORKERS), txpos += NWORKERS) { + $if WEIGHT_STORAGE == "buffer": + uint qmat2_bufi; + uint weight_row_txstride = div4(weight_sizes.x); + + // Preload weight tensor + [[unroll]] for (int r = 0; r < 4; r++) { + $if QUANT_NBITS == 4: + $for c in range(0, TILE_TXCOLS, 2): + $if WEIGHT_STORAGE == "buffer": + qmat2_bufi = (pos + r) * weight_row_txstride + weight_txcol; + const u8vec4 packed_weight_tex = t_weight[qmat2_bufi + ${c}] + $else: + const uvec4 packed_weight_tex = texelFetch( + t_weight, ivec2(weight_txcol + ${c}, pos + r), 0); + + qmat2[r][${c}] = (VEC4_T((packed_weight_tex & 0xF0) >> 4) - 8.0); + qmat2[r][${c + 1}] = (VEC4_T(packed_weight_tex & 0x0F) - 8.0); $else: - b[i] = VEC4_T(texelFetch(t_weight, ivec2(out_col >> 2, pos + i), 0)); + $for c in range(TILE_TXCOLS): + $if WEIGHT_STORAGE == "buffer": + qmat2_bufi = (pos + r) * weight_row_txstride + out_txcol; + qmat2[r][${c}] = t_weight[qmat2_bufi + ${c}]; + $else: + qmat2[r][${c}] = VEC4_T( + texelFetch(t_weight, ivec2(out_txcol + ${c}, pos + r), 0)); } - // Preload t_in - for (int i = 0; i < TILE_ROWS; i++) { + + $if IN_STORAGE == "buffer": + uint in_row_txstride = div4(in_sizes.x); + + // Preload input tensor + [[unroll]] for (int i = 0; i < TILE_ROWS; i++) { $if IN_STORAGE == "buffer": - a[i] = t_in[((out_row + i) * in_sizes.x + pos) >> 2]; + mat1[i] = t_in[(out_row + i) * in_row_txstride + txpos]; $else: - a[i] = VEC4_T(texelFetch(t_in, ivec3(pos >> 2, out_row + i, 0), 0)); + mat1[i] = VEC4_T( + texelFetch(t_in, ivec3(txpos, out_row + i, 0), 0)); } // Accumulate partial output - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - local_c[i] += a[i].x * b[0] + - a[i].y * b[1] + - a[i].z * b[2] + - a[i].w * b[3]; + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + local_sums[r][${c}] += mat1[r].x * qmat2[0][${c}] + + mat1[r].y * qmat2[1][${c}] + + mat1[r].z * qmat2[2][${c}] + + mat1[r].w * qmat2[3][${c}]; } } - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - partial_c[gid][wid][i] = local_c[i]; + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + partial_sums[gid][wid][r][${c}] = local_sums[r][${c}]; } memoryBarrierShared(); @@ -105,21 +152,33 @@ void main() { return; } - VEC4_T c[TILE_ROWS]; + VEC4_T sums[TILE_ROWS][TILE_TXCOLS]; + + for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + sums[r][${c}] = VEC4_T(0.0); - for (int row = 0; row < TILE_ROWS; ++row) { - c[row] = VEC4_T(0.0); [[unroll]] for (int worker = 0; worker < NWORKERS; ++worker) { - c[row] += partial_c[gid][worker][row]; + $for c in range(TILE_TXCOLS): + sums[r][${c}] += partial_sums[gid][worker][r][${c}]; } } - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - $if OUT_STORAGE == "buffer": - if (out_row + i < out_sizes.y) { - t_out[((out_row + i) * out_sizes.x + out_col) >> 2] = c[i] * scales; - } - $else: - imageStore(t_out, ivec3(out_col >> 2, out_row + i, 0), c[i] * scales); + $if OUT_STORAGE == "buffer": + uint out_bufi; + uint out_row_txstride = div4(out_sizes.x); + + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + $if OUT_STORAGE == "buffer": + if (out_row + r < out_sizes.y) { + out_bufi = (out_row + r) * out_row_txstride + out_txcol; + t_out[out_bufi + ${c}] = sums[r][${c}] * scales[${c}]; + } + $else: + imageStore( + t_out, + ivec3(out_txcol + ${c}, out_row + r, 0), + sums[r][${c}] * scales[${c}]); } } diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml index e0477a3a3d1..3dff6855142 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_coop.yaml @@ -12,6 +12,8 @@ linear_qcsnw_coop: WEIGHT_STORAGE: texture2d SCALES_STORAGE: texture2d TILE_ROWS: 4 + TILE_TXCOLS: 1 + QUANT_NBITS: 8 generate_variant_forall: TILE_ROWS: - VALUE: 1 @@ -26,3 +28,11 @@ linear_qcsnw_coop: OUT_STORAGE: buffer WEIGHT_STORAGE: buffer SCALES_STORAGE: buffer + - NAME: linear_qcs4w_coop_texture3d_texture3d_texture2d_texture2d_float + TILE_TXCOLS: 2 + QUANT_NBITS: 4 + - NAME: linear_qcs4w_coop_buffer_buffer_texture2d_texture2d_float + IN_STORAGE: buffer + OUT_STORAGE: buffer + TILE_TXCOLS: 2 + QUANT_NBITS: 4 diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl index 3ef952ea34d..f6f05aab7ca 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.glsl @@ -14,6 +14,7 @@ #define VEC4_T ${buffer_gvec_type(DTYPE, 4)} #define TILE_ROWS ${TILE_ROWS} +#define TILE_TXCOLS ${TILE_TXCOLS} ${define_required_extensions(DTYPE)} @@ -26,7 +27,10 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, OUT_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, IN_STORAGE, is_scalar_array=False)} -${layout_declare_tensor(B, "r", "t_weight", "int8", WEIGHT_STORAGE, is_scalar_array=False)} +$if QUANT_NBITS == 4: + ${layout_declare_tensor(B, "r", "t_weight", "uint8", WEIGHT_STORAGE, is_scalar_array=False)} +$else: + ${layout_declare_tensor(B, "r", "t_weight", "int8", WEIGHT_STORAGE, is_scalar_array=False)} ${layout_declare_tensor(B, "r", "t_scales", DTYPE, SCALES_STORAGE, is_scalar_array=False)} @@ -43,57 +47,110 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require void main() { - const uint16_t out_width_ntexels = uint16_t(divup4(out_sizes.x)); - const uint16_t out_col = uint16_t((gl_GlobalInvocationID.x % out_width_ntexels) << 2); - const uint16_t out_row = uint16_t((gl_GlobalInvocationID.x / out_width_ntexels) * TILE_ROWS); + // txcol stands for "texel column". One txcol corresponds to 4 scalar columns. + $if TILE_TXCOLS > 1: + const uint16_t global_wg_x = uint16_t(divup(out_sizes.x, 4 * TILE_TXCOLS)); + const uint16_t out_txcol = uint16_t( + (gl_GlobalInvocationID.x % global_wg_x) * TILE_TXCOLS); + $else: + const uint16_t global_wg_x = uint16_t(divup4(out_sizes.x)); + const uint16_t out_txcol = uint16_t(gl_GlobalInvocationID.x % global_wg_x); + + const uint16_t out_row = uint16_t( + (gl_GlobalInvocationID.x / global_wg_x) * TILE_ROWS); + + $if QUANT_NBITS == 4: + const uint16_t weight_txcol = uint16_t(out_txcol / 2); if (out_row >= uint16_t(out_sizes.y)) { return; } - VEC4_T a[TILE_ROWS]; - VEC4_T b[4]; - VEC4_T c[TILE_ROWS]; + VEC4_T mat1[TILE_ROWS]; + VEC4_T qmat2[4][TILE_TXCOLS]; + VEC4_T sums[TILE_ROWS][TILE_TXCOLS]; - $if SCALES_STORAGE == "buffer": - const VEC4_T scales = VEC4_T(t_scales[int(out_col >> 2)]); - $else: - const VEC4_T scales = VEC4_T(texelFetch(t_scales, u16vec2(out_col >> 2, 0), 0)); + VEC4_T scales[TILE_TXCOLS]; + $for c in range(TILE_TXCOLS): + $if SCALES_STORAGE == "buffer": + scales[${c}] = VEC4_T(t_scales[out_txcol + ${c}]); + $else: + scales[${c}] = VEC4_T( + texelFetch(t_scales, u16vec2(out_txcol + ${c}, 0), 0)); - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - c[i] = VEC4_T(0.0); + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + sums[r][${c}] = VEC4_T(0.0); } - for (uint16_t pos = uint16_t(0); pos < uint16_t(in_sizes.x); pos += uint16_t(4)) { + for (uint16_t pos = uint16_t(0), txpos = uint16_t(0); + pos < uint16_t(in_sizes.x); + pos += uint16_t(4), txpos += uint16_t(1)) { + $if WEIGHT_STORAGE == "buffer": + uint qmat2_bufi; + uint weight_row_txstride = div4(weight_sizes.x); + // Preload weight tensor - [[unroll]] for (int i = 0; i < 4; i++) { - $if WEIGHT_STORAGE == "buffer": - b[i] = t_weight[((pos + i) * out_sizes.x + out_col) >> 2]; + [[unroll]] for (int r = 0; r < 4; r++) { + $if QUANT_NBITS == 4: + $for c in range(0, TILE_TXCOLS, 2): + $if WEIGHT_STORAGE == "buffer": + qmat2_bufi = (pos + r) * weight_row_txstride + weight_txcol; + const u8vec4 packed_weight_tex = t_weight[qmat2_bufi + ${c}] + $else: + const uvec4 packed_weight_tex = texelFetch( + t_weight, u16vec2(weight_txcol + ${c}, pos + r), 0); + + qmat2[r][${c}] = (VEC4_T((packed_weight_tex & 0xF0) >> 4) - 8.0); + qmat2[r][${c + 1}] = (VEC4_T(packed_weight_tex & 0x0F) - 8.0); $else: - b[i] = VEC4_T(texelFetch(t_weight, u16vec2(out_col >> 2, pos + i), 0)); + $for c in range(TILE_TXCOLS): + $if WEIGHT_STORAGE == "buffer": + qmat2_bufi = (pos + r) * weight_row_txstride + out_txcol; + qmat2[r][${c}] = t_weight[qmat2_bufi + ${c}]; + $else: + qmat2[r][${c}] = VEC4_T( + texelFetch(t_weight, u16vec2(out_txcol + ${c}, pos + r), 0)); } + $if IN_STORAGE == "buffer": + uint in_row_txstride = div4(in_sizes.x); + // Preload input tensor [[unroll]] for (int i = 0; i < TILE_ROWS; i++) { $if IN_STORAGE == "buffer": - a[i] = t_in[((out_row + i) * in_sizes.x + pos) >> 2]; + mat1[i] = t_in[(out_row + i) * in_row_txstride + txpos]; $else: - a[i] = VEC4_T(texelFetch(t_in, u16vec3(pos >> 2, out_row + i, 0), 0)); + mat1[i] = VEC4_T( + texelFetch(t_in, u16vec3(txpos, out_row + i, 0), 0)); } // Accumulate output - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - c[i] += a[i].x * b[0] + a[i].y * b[1] + a[i].z * b[2] + a[i].w * b[3]; + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + sums[r][${c}] += mat1[r].x * qmat2[0][${c}] + + mat1[r].y * qmat2[1][${c}] + + mat1[r].z * qmat2[2][${c}] + + mat1[r].w * qmat2[3][${c}]; } } // Store to output tensor - [[unroll]] for (int i = 0; i < TILE_ROWS; ++i) { - $if OUT_STORAGE == "buffer": - if (out_row + i < out_sizes.y) { - t_out[((out_row + i) * out_sizes.x + out_col) >> 2] = c[i] * scales; - } - $else: - imageStore(t_out, ivec3(out_col >> 2, out_row + i, 0), c[i] * scales); + $if OUT_STORAGE == "buffer": + uint out_bufi; + uint out_row_txstride = div4(out_sizes.x); + + [[unroll]] for (int r = 0; r < TILE_ROWS; ++r) { + $for c in range(TILE_TXCOLS): + $if OUT_STORAGE == "buffer": + if (out_row + r < out_sizes.y) { + out_bufi = (out_row + r) * out_row_txstride + out_txcol; + t_out[out_bufi + ${c}] = sums[r][${c}] * scales[${c}]; + } + $else: + imageStore( + t_out, + ivec3(out_txcol + ${c}, out_row + r, 0), + sums[r][${c}] * scales[${c}]); } } diff --git a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml index f9f0134d995..1c9ec4e524a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/linear_qcsnw_tiled.yaml @@ -12,6 +12,8 @@ linear_qcsnw_tiled: WEIGHT_STORAGE: texture2d SCALES_STORAGE: texture2d TILE_ROWS: 4 + TILE_TXCOLS: 1 + QUANT_NBITS: 8 generate_variant_forall: TILE_ROWS: - VALUE: 1 @@ -30,3 +32,11 @@ linear_qcsnw_tiled: OUT_STORAGE: buffer WEIGHT_STORAGE: buffer SCALES_STORAGE: buffer + - NAME: linear_qcs4w_tiled_texture3d_texture3d_texture2d_texture2d_float + TILE_TXCOLS: 2 + QUANT_NBITS: 4 + - NAME: linear_qcs4w_tiled_buffer_buffer_texture2d_texture2d_float + IN_STORAGE: buffer + OUT_STORAGE: buffer + TILE_TXCOLS: 2 + QUANT_NBITS: 4 diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp index 85695488dfc..6e101195e3f 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQCSNW.cpp @@ -17,6 +17,7 @@ namespace vkcompute { void check_linear_qcsnw_args( const ComputeGraph& graph, + const int quant_nbits, const ValueRef mat1, const ValueRef qmat2_data, const ValueRef scales, @@ -31,13 +32,20 @@ void check_linear_qcsnw_args( VK_CHECK_COND(graph.packed_dim_of(mat1) == graph.packed_dim_of(out)); - VK_CHECK_COND( - utils::val_at(-1, mat1_sizes) == utils::val_at(-1, qmat2_sizes)); - VK_CHECK_COND( - utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes)); + if (quant_nbits == 4) { + VK_CHECK_COND( + utils::val_at(-1, mat1_sizes) == utils::val_at(-1, qmat2_sizes) * 2); + VK_CHECK_COND( + utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes)); + } else { + VK_CHECK_COND( + utils::val_at(-1, mat1_sizes) == utils::val_at(-1, qmat2_sizes)); + VK_CHECK_COND( + utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes)); + } } -void resize_linear_qcs8w_node( +void resize_linear_qcsnw_node( ComputeGraph* graph, const std::vector& args, const std::vector& extra_args) { @@ -48,7 +56,12 @@ void resize_linear_qcs8w_node( vTensorPtr qmat2 = graph->get_tensor(args[1].refs[1]); const int out_cols = utils::val_at(-2, mat1->sizes()); - const int out_rows = utils::val_at(-1, qmat2->sizes()); + int out_rows = utils::val_at(-1, qmat2->sizes()); + // Byte dtype suggests 4-bit quantization in which case the weight tensor is + // packed with 2 values per byte. + if (qmat2->dtype() == vkapi::kByte) { + out_rows *= 2; + } std::vector new_out_sizes(3); if (mat1->sizes().size() == 2) { @@ -135,34 +148,40 @@ void add_linear_qcs8w_node( // Resize Args {}, // Resizing Logic - resize_linear_qcs8w_node)); + resize_linear_qcsnw_node)); if (!graph.is_buffer_storage(out) && graph.packed_dim_of(out) != WHCN::kWidthDim) { viewFn(graph, {out_W_packed, graph.add_none(), out}); } } -void add_linear_qcs8w_tiled_node( +void add_linear_qcsnw_tiled_node( ComputeGraph& graph, const bool use_coop_algorithm, + const int quant_nbits, const ValueRef mat1, const ValueRef q_mat2_data, const ValueRef scales_data, const ValueRef out) { - utils::StorageType q_mat2_storage = utils::kTexture2D; - uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); std::vector qmat2_orig_sizes = graph.sizes_of(q_mat2_data); const int64_t ndim = graph.dim_of(q_mat2_data); const int64_t K = qmat2_orig_sizes.at(ndim - 1); const int64_t N = qmat2_orig_sizes.at(ndim - 2); - if (N > max_extent * 4 || K > max_extent) { - q_mat2_storage = utils::kBuffer; - } + ValueRef q_mat2; + if (quant_nbits == 4) { + q_mat2 = + prepack_int4_linear_weight_transposed_interleaved(graph, q_mat2_data); + } else { + utils::StorageType q_mat2_storage = utils::kTexture2D; + if (N > max_extent * 4 || K > max_extent) { + q_mat2_storage = utils::kBuffer; + } - ValueRef q_mat2 = prepack_standard_hw_transposed( - graph, q_mat2_data, q_mat2_storage, utils::kWidthPacked); + q_mat2 = prepack_standard_hw_transposed( + graph, q_mat2_data, q_mat2_storage, utils::kWidthPacked); + } utils::StorageType scales_storage = utils::kTexture2D; if (N > max_extent) { @@ -171,8 +190,14 @@ void add_linear_qcs8w_tiled_node( ValueRef scales = prepack_standard(graph, scales_data, scales_storage, utils::kWidthPacked); - std::string kernel_name = - use_coop_algorithm ? "linear_qcs8w_coop" : "linear_qcs8w_tiled"; + std::string kernel_name; + if (quant_nbits == 4) { + kernel_name = + use_coop_algorithm ? "linear_qcs4w_coop" : "linear_qcs4w_tiled"; + } else { + kernel_name = + use_coop_algorithm ? "linear_qcs8w_coop" : "linear_qcs8w_tiled"; + } kernel_name.reserve(kShaderNameReserve); add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); add_storage_type_suffix(kernel_name, graph.storage_type_of(mat1)); @@ -197,9 +222,16 @@ void add_linear_qcs8w_tiled_node( out_tile_nrows = 4; } + // Number of output texels in the output tile + uint32_t out_tile_ntxcols = 1; + if (quant_nbits == 4) { + out_tile_ntxcols = 2; + } + utils::uvec3 out_limits = graph.logical_limits_of(out); + uint32_t global_wg_x = utils::div_up(out_limits[0], out_tile_ntxcols); utils::uvec3 global_wg_size = { - out_limits[0] * (utils::div_up(out_limits[1], out_tile_nrows)), + global_wg_x * (utils::div_up(out_limits[1], out_tile_nrows)), 1, out_limits[2]}; @@ -224,7 +256,7 @@ void add_linear_qcs8w_tiled_node( // Resize Args {}, // Resizing Logic - resize_linear_qcs8w_node)); + resize_linear_qcsnw_node)); } bool can_use_tiled_impl( @@ -238,7 +270,7 @@ bool can_use_tiled_impl( // Check if mat1 is not a 3D tensor or that batches = 1 // TODO(ssjia): Add support for batches in the tiled impl - if (graph.dim_of(mat1) == 3 && graph.size_at(-1, mat1) != 1) { + if (graph.dim_of(mat1) == 3 && graph.size_at(0, mat1) != 1) { return false; } // Check that K is a multiple of 4 @@ -283,17 +315,27 @@ bool can_use_coop_impl(ComputeGraph& graph, const ValueRef mat1) { void weight_int8pack_mm( ComputeGraph& graph, const std::vector& args) { - check_linear_qcsnw_args(graph, args[0], args[1], args[2], args[3]); + check_linear_qcsnw_args(graph, 8, args[0], args[1], args[2], args[3]); if (can_use_tiled_impl(graph, args[0], args[1], args[2], args[3])) { bool use_coop_algorithm = can_use_coop_impl(graph, args[0]); - return add_linear_qcs8w_tiled_node( - graph, use_coop_algorithm, args[0], args[1], args[2], args[3]); + return add_linear_qcsnw_tiled_node( + graph, use_coop_algorithm, 8, args[0], args[1], args[2], args[3]); } return add_linear_qcs8w_node(graph, args[0], args[1], args[2], args[3]); } +void linear_qcs4w(ComputeGraph& graph, const std::vector& args) { + check_linear_qcsnw_args(graph, 4, args[0], args[1], args[2], args[3]); + + VK_CHECK_COND(can_use_tiled_impl(graph, args[0], args[1], args[2], args[3])); + bool use_coop_algorithm = can_use_coop_impl(graph, args[0]); + return add_linear_qcsnw_tiled_node( + graph, use_coop_algorithm, 4, args[0], args[1], args[2], args[3]); +} + REGISTER_OPERATORS { VK_REGISTER_OP(aten._weight_int8pack_mm.default, weight_int8pack_mm); + VK_REGISTER_OP(et_vk.linear_qcs4w.default, linear_qcs4w); } } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp index b3ead94d8ff..8c5cb0093d9 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinearQGANW.cpp @@ -70,54 +70,6 @@ void resize_linear_qga4w_node( out->virtual_resize(new_out_sizes); } -ValueRef prepack_int4_linear_weight_transposed_interleaved( - ComputeGraph& graph, - const ValueRef qmat2_data) { - std::vector qmat2_orig_sizes = graph.sizes_of(qmat2_data); - const int64_t ndim = graph.dim_of(qmat2_data); - - const int64_t K = qmat2_orig_sizes.at(ndim - 1) * 2; - const int64_t N = qmat2_orig_sizes.at(ndim - 2); - const int64_t N_div2 = N / int64_t(2); - - utils::StorageType storage_type = utils::kTexture2D; - uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); - if (N_div2 > max_extent * 4 || K > max_extent) { - storage_type = utils::kBuffer; - } - - std::vector qmat2_sizes{K, N_div2}; - ValueRef qmat2 = graph.add_tensor( - qmat2_sizes, vkcompute::vkapi::kByte, storage_type, utils::kWidthPacked); - - utils::uvec3 global_wg_size; - global_wg_size = graph.logical_limits_of(qmat2); - global_wg_size[1] = utils::div_up(global_wg_size[1], uint32_t(2)); - - std::string kernel_name = - graph.context()->adapter_ptr()->has_full_int8_buffers_support() - ? "pack_int4_linear_weight_transposed_interleaved" - : "pack_int4_linear_weight_transposed_interleaved_nobitw8buffer"; - add_storage_type_suffix(kernel_name, storage_type); - - graph.prepack_nodes().emplace_back(new PrepackNode( - graph, - VK_KERNEL_FROM_STR(kernel_name), - global_wg_size, - graph.create_local_wg_size(global_wg_size), - // Inputs and Outputs - qmat2_data, - qmat2, - // UBOs - {}, - // Specialization Constants - {}, - // Push Constants - {graph.sizes_pc_of(qmat2)})); - - return qmat2; -} - void add_linear_qga4w_node( ComputeGraph& graph, const ValueRef mat1, diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 32e63baeafc..f39b0fc33ff 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -246,6 +246,54 @@ ValueRef prepack_direct_copy_buffer( return tensor; } +ValueRef prepack_int4_linear_weight_transposed_interleaved( + ComputeGraph& graph, + const ValueRef qmat2_data) { + std::vector qmat2_orig_sizes = graph.sizes_of(qmat2_data); + const int64_t ndim = graph.dim_of(qmat2_data); + + const int64_t K = qmat2_orig_sizes.at(ndim - 1) * 2; + const int64_t N = qmat2_orig_sizes.at(ndim - 2); + const int64_t N_div2 = N / int64_t(2); + + utils::StorageType storage_type = utils::kTexture2D; + uint32_t max_extent = graph.context()->adapter_ptr()->max_texture2d_dim(); + if (N_div2 > max_extent * 4 || K > max_extent) { + storage_type = utils::kBuffer; + } + + std::vector qmat2_sizes{K, N_div2}; + ValueRef qmat2 = graph.add_tensor( + qmat2_sizes, vkcompute::vkapi::kByte, storage_type, utils::kWidthPacked); + + utils::uvec3 global_wg_size; + global_wg_size = graph.logical_limits_of(qmat2); + global_wg_size[1] = utils::div_up(global_wg_size[1], uint32_t(2)); + + std::string kernel_name = + graph.context()->adapter_ptr()->has_full_int8_buffers_support() + ? "pack_int4_linear_weight_transposed_interleaved" + : "pack_int4_linear_weight_transposed_interleaved_nobitw8buffer"; + add_storage_type_suffix(kernel_name, storage_type); + + graph.prepack_nodes().emplace_back(new PrepackNode( + graph, + VK_KERNEL_FROM_STR(kernel_name), + global_wg_size, + graph.create_local_wg_size(global_wg_size), + // Inputs and Outputs + qmat2_data, + qmat2, + // UBOs + {}, + // Specialization Constants + {}, + // Push Constants + {graph.sizes_pc_of(qmat2)})); + + return qmat2; +} + void prepack_op(ComputeGraph& graph, const std::vector& args) { return add_prepack_standard_node(graph, args[0], args[1]); } diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.h b/backends/vulkan/runtime/graph/ops/impl/Staging.h index 1b6f245bd34..090a3718295 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.h +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.h @@ -87,4 +87,11 @@ ValueRef prepack_direct_copy_buffer( ComputeGraph& graph, const ValueRef tensor_data); +// +// Op specific prepack functions + +ValueRef prepack_int4_linear_weight_transposed_interleaved( + ComputeGraph& graph, + const ValueRef qmat2_data); + } // namespace vkcompute diff --git a/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp b/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp index 5d08ee57859..b95b7b3aa6d 100644 --- a/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp +++ b/backends/vulkan/test/op_tests/linear_weight_int4_test.cpp @@ -62,7 +62,7 @@ at::Tensor unpack_weights_4x2(const at::Tensor& weights_4x2) { return weights_unpacked; } -at::Tensor dequantize_and_linear( +at::Tensor dequantize_and_linear_qga4w( const at::Tensor& x, const at::Tensor& weights_4x2, const int64_t groupsize, @@ -97,6 +97,56 @@ at::Tensor dequantize_and_linear( return at::linear(x, weights_dequantized); } +at::Tensor dequantize_and_linear_qcs4w( + const at::Tensor& x, + const at::Tensor& weights_4x2, + const at::Tensor& scales) { + std::vector weights_shape(weights_4x2.sizes().vec()); + weights_shape[1] *= 2; + + at::Tensor weights_dequantized = + at::empty(weights_shape, at::device(at::kCPU).dtype(at::kFloat)); + + const int64_t N = weights_dequantized.size(0); + const int64_t K = weights_dequantized.size(1); + + for (int n = 0; n < N; n++) { + for (int k = 0; k < K; k += 2) { + // const int scale_idx = k_groups * n + group_idx; + const uint8_t packed_val = weights_4x2[n][k / 2].item().to(); + const uint8_t second_val = packed_val & 0x0F; + const uint8_t first_val = (packed_val & 0xF0) >> 4; + + const float scale = scales[n].item().to(); + + weights_dequantized[n][k] = (float(first_val) - 8.0) * scale; + weights_dequantized[n][k + 1] = (float(second_val) - 8.0) * scale; + } + } + + return at::linear(x, weights_dequantized); +} + +at::Tensor linear_qcs4w_reference_impl( + const at::Tensor& x, + const at::Tensor& weights_4x2, + const at::Tensor& scales) { + const std::vector original_x_size(x.sizes().vec()); + const size_t ndim = original_x_size.size(); + const int64_t out_features = weights_4x2.size(0); + const at::Tensor x_flattened = x.reshape({-1, original_x_size[ndim - 1]}); + + const at::Tensor weights_unpacked = + (unpack_weights_4x2(weights_4x2) - 8).to(at::kChar); + at::Tensor out = + at::_weight_int8pack_mm(x_flattened, weights_unpacked, scales); + + std::vector out_shape( + original_x_size.begin(), original_x_size.end()); + out_shape.at(ndim - 1) = out_features; + return out.reshape(out_shape); +} + // // Test functions // @@ -126,12 +176,31 @@ void test_reference_linear_qga4w( scales_and_zeros, inner_k_tiles); - at::Tensor out_ref = dequantize_and_linear( + at::Tensor out_ref = dequantize_and_linear_qga4w( x, weights_4x2, group_size, scales_and_zeros, inner_k_tiles); ASSERT_TRUE(at::allclose(out, out_ref)); } +void test_reference_linear_qcs4w( + const int B, + const int M, + const int K, + const int N) { + at::Tensor x = at::rand({B, M, K}, at::device(at::kCPU).dtype(at::kFloat)); + at::Tensor weights_4x2 = + at::randint(0, 256, {N, K / 2}, at::device(at::kCPU).dtype(at::kByte)); + at::Tensor weights_int = unpack_weights_4x2(weights_4x2); + + at::Tensor scales = at::rand({N}, at::device(at::kCPU).dtype(at::kFloat)); + + at::Tensor out = linear_qcs4w_reference_impl(x, weights_4x2, scales); + + at::Tensor out_ref = dequantize_and_linear_qcs4w(x, weights_4x2, scales); + + ASSERT_TRUE(at::allclose(out, out_ref)); +} + vkcompute::vkapi::ScalarType from_at_scalartype(c10::ScalarType at_scalartype) { using namespace vkcompute; switch (at_scalartype) { @@ -265,6 +334,85 @@ void test_vulkan_linear_qga4w( vkcompute::utils::kTexture3D); } +void test_vulkan_linear_qcs4w_impl( + const int B, + const int M, + const int K, + const int N, + const vkcompute::utils::StorageType in_storage = + vkcompute::utils::kTexture3D, + const vkcompute::utils::StorageType out_storage = + vkcompute::utils::kTexture3D) { + at::Tensor x = at::rand({B, M, K}, at::device(at::kCPU).dtype(at::kFloat)); + at::Tensor weights_4x2 = + at::randint(0, 256, {N, K / 2}, at::device(at::kCPU).dtype(at::kByte)); + + at::Tensor scales = at::rand({N}, at::device(at::kCPU).dtype(at::kFloat)); + + at::Tensor out_ref = linear_qcs4w_reference_impl(x, weights_4x2, scales); + + // Build Vulkan graph + using namespace vkcompute; + + GraphConfig config; + config.set_storage_type_override(utils::kTexture3D); + ComputeGraph graph(config); + +#define MAKE_TENSORREF_FOR(x) \ + ValueRef r_##x = graph.add_tensorref( \ + x.sizes().vec(), \ + from_at_scalartype(x.scalar_type()), \ + x.const_data_ptr()); + + MAKE_TENSORREF_FOR(weights_4x2); + MAKE_TENSORREF_FOR(scales); + + IOValueRef r_x = graph.add_input_tensor( + x.sizes().vec(), from_at_scalartype(x.scalar_type()), in_storage); + + const ValueRef r_out = graph.add_tensor( + out_ref.sizes().vec(), + from_at_scalartype(out_ref.scalar_type()), + out_storage); + + VK_GET_OP_FN("et_vk.linear_qcs4w.default") + (graph, {r_x.value, r_weights_4x2, r_scales, r_out}); + + ValueRef staging_out = graph.set_output_tensor(r_out); + + graph.prepare(); + graph.encode_prepack(); + graph.prepack(); + graph.encode_execute(); + + // + // Run model + // + + graph.propagate_resize(); + graph.copy_into_staging(r_x.staging, x.const_data_ptr(), x.numel()); + + graph.execute(); + + at::Tensor vk_out = at::empty_like(out_ref); + graph.copy_from_staging( + staging_out, vk_out.mutable_data_ptr(), vk_out.numel()); + + ASSERT_TRUE(at::allclose(vk_out, out_ref, 1e-4, 1e-4)); +} + +void test_vulkan_linear_qcs4w( + const int B, + const int M, + const int K, + const int N) { + test_vulkan_linear_qcs4w_impl( + B, M, K, N, vkcompute::utils::kBuffer, vkcompute::utils::kBuffer); + + test_vulkan_linear_qcs4w_impl( + B, M, K, N, vkcompute::utils::kTexture3D, vkcompute::utils::kTexture3D); +} + TEST(VulkanLinearQGA4WTest, test_reference_impl) { test_reference_linear_qga4w( /*B = */ 1, @@ -294,3 +442,33 @@ TEST(VulkanLinearQGA4WTest, test_vulkan_impl_gemm) { /*K = */ 256, /*N = */ 256); } + +TEST(VulkanLinearQCS4WTest, test_reference_impl) { + test_reference_linear_qcs4w( + /*B = */ 1, + /*M = */ 4, + /*K = */ 128, + /*N = */ 32); +} + +TEST(VulkanLinearQCS4WTest, test_vulkan_impl_small_m) { + test_vulkan_linear_qcs4w( + /*B = */ 1, + /*M = */ 4, + /*K = */ 128, + /*N = */ 32); + + test_vulkan_linear_qcs4w( + /*B = */ 1, + /*M = */ 1, + /*K = */ 256, + /*N = */ 256); +} + +TEST(VulkanLinearQCS4WTest, test_vulkan_impl_gemm) { + test_vulkan_linear_qcs4w( + /*B = */ 1, + /*M = */ 32, + /*K = */ 32, + /*N = */ 32); +} From d9c6f80546af09684e85277d7cee5aa7c15b2746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Thu, 8 May 2025 14:14:10 +0200 Subject: [PATCH 007/178] Arm backend: Add model name to -llama_inputs (#10775) This way other Llama variants than stories110m can be run. --- backends/arm/test/models/test_llama.py | 29 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 44a8fdc2a04..f5d879b3b8b 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -33,27 +33,35 @@ class TestLlama(unittest.TestCase): """ Test class of Llama models. Type of Llama model depends on command line parameters: - --llama_inputs - Example: --llama_inputs stories110M/stories110M.pt stories110M/params.json + --llama_inputs + Example: --llama_inputs stories110M/stories110M.pt stories110M/params.json stories110m + For more examples and info see examples/models/llama/README.md. """ def prepare_model(self): checkpoint = None params_file = None + usage = "To run use --llama_inputs <.pt/.pth> <.json> " + if conftest.is_option_enabled("llama_inputs"): param_list = conftest.get_option("llama_inputs") - assert ( - isinstance(param_list, list) and len(param_list) == 2 - ), "invalid number of inputs for --llama_inputs" + + if not isinstance(param_list, list) or len(param_list) != 3: + raise RuntimeError( + f"Invalid number of inputs for --llama_inputs. {usage}" + ) + if not all(isinstance(param, str) for param in param_list): + raise RuntimeError( + f"All --llama_inputs are expected to be strings. {usage}" + ) + checkpoint = param_list[0] params_file = param_list[1] - assert isinstance(checkpoint, str) and isinstance( - params_file, str - ), "invalid input for --llama_inputs" + model_name = param_list[2] else: logger.warning( - "Skipping Llama test because of lack of input. To run use --llama_inputs <.pt> <.json>" + "Skipping Llama tests because of missing --llama_inputs. {usage}" ) return None, None, None @@ -71,7 +79,7 @@ def prepare_model(self): "-p", params_file, "--model", - "stories110m", + model_name, ] parser = build_args_parser() args = parser.parse_args(args) @@ -122,6 +130,7 @@ def test_llama_tosa_BI(self): .quantize() .export() .to_edge_transform_and_lower() + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) .to_executorch() .run_method_and_compare_outputs( inputs=llama_inputs, From 3c21e3a9d8d79c6091647b70b27ac2487940cccc Mon Sep 17 00:00:00 2001 From: SaoirseARM <44364573+SaoirseARM@users.noreply.github.com> Date: Thu, 8 May 2025 15:53:08 +0100 Subject: [PATCH 008/178] Arm Backend: Update unit tests for TOSA 1.0 (#10776) ### Summary Refactoring of unit tests to allow for testing of TOSA 1.0 Adds command-line argument --arm_run_tosa_version to run tests on particular version --- backends/arm/scripts/parse_test_names.py | 10 +- backends/arm/test/common.py | 14 +- backends/arm/test/conftest.py | 16 +- backends/arm/test/ops/test_abs.py | 173 ++---- backends/arm/test/ops/test_add.py | 85 +-- backends/arm/test/ops/test_alias_copy.py | 24 +- backends/arm/test/ops/test_amax.py | 95 ++- backends/arm/test/ops/test_amin.py | 86 ++- backends/arm/test/ops/test_any.py | 88 +-- backends/arm/test/ops/test_arange.py | 39 +- backends/arm/test/ops/test_avg_pool2d.py | 95 +-- backends/arm/test/ops/test_batch_norm.py | 500 +++++---------- backends/arm/test/ops/test_bitwise.py | 56 +- backends/arm/test/ops/test_bmm.py | 307 +++++----- backends/arm/test/ops/test_cat.py | 274 ++++----- backends/arm/test/ops/test_clamp.py | 67 +-- backends/arm/test/ops/test_clone.py | 63 +- backends/arm/test/ops/test_constant_pad_nd.py | 25 +- backends/arm/test/ops/test_conv1d.py | 72 +-- backends/arm/test/ops/test_conv2d.py | 99 ++- backends/arm/test/ops/test_conv3d.py | 71 ++- backends/arm/test/ops/test_conv_combos.py | 567 ++++++++++-------- backends/arm/test/ops/test_depthwise_conv.py | 266 ++++---- backends/arm/test/ops/test_div.py | 268 +++------ backends/arm/test/ops/test_eq.py | 66 +- backends/arm/test/ops/test_erf.py | 20 +- backends/arm/test/ops/test_exp.py | 188 +++--- backends/arm/test/ops/test_expand.py | 263 ++++---- backends/arm/test/ops/test_full.py | 369 ++++++------ backends/arm/test/ops/test_ge.py | 54 +- backends/arm/test/ops/test_gelu.py | 42 +- backends/arm/test/ops/test_gt.py | 58 +- backends/arm/test/ops/test_hardsigmoid.py | 191 +++--- backends/arm/test/ops/test_hardswish.py | 181 ++---- backends/arm/test/ops/test_hardtanh.py | 206 +++---- backends/arm/test/ops/test_layer_norm.py | 35 +- backends/arm/test/ops/test_le.py | 60 +- backends/arm/test/ops/test_leaky_relu.py | 28 +- backends/arm/test/ops/test_linear.py | 326 ++++------ backends/arm/test/ops/test_log.py | 178 ++---- backends/arm/test/ops/test_logical.py | 86 ++- backends/arm/test/ops/test_logsoftmax.py | 41 +- backends/arm/test/ops/test_lshift.py | 91 +-- backends/arm/test/ops/test_lt.py | 62 +- backends/arm/test/ops/test_max_pool.py | 421 +++++-------- backends/arm/test/ops/test_maximum.py | 182 ++---- backends/arm/test/ops/test_mean_dim.py | 80 +-- backends/arm/test/ops/test_minimum.py | 185 ++---- backends/arm/test/ops/test_mm.py | 71 +-- backends/arm/test/ops/test_mul.py | 273 ++++----- backends/arm/test/ops/test_ne.py | 6 +- backends/arm/test/ops/test_permute.py | 260 +++----- backends/arm/test/ops/test_pow.py | 28 +- backends/arm/test/ops/test_reciprocal.py | 197 +++--- backends/arm/test/ops/test_relu.py | 197 +++--- backends/arm/test/ops/test_repeat.py | 183 +++--- backends/arm/test/ops/test_rshift.py | 113 ++-- backends/arm/test/ops/test_rsqrt.py | 170 +++--- backends/arm/test/ops/test_scalar_tensor.py | 42 +- backends/arm/test/ops/test_scalars.py | 178 +++--- backends/arm/test/ops/test_select.py | 313 +++++----- backends/arm/test/ops/test_sigmoid.py | 327 +++++----- backends/arm/test/ops/test_sigmoid_16bit.py | 68 ++- backends/arm/test/ops/test_sigmoid_32bit.py | 59 +- backends/arm/test/ops/test_silu.py | 32 +- backends/arm/test/ops/test_slice.py | 212 +++---- backends/arm/test/ops/test_softmax.py | 25 +- backends/arm/test/ops/test_split.py | 262 ++++---- backends/arm/test/ops/test_sqrt.py | 32 +- backends/arm/test/ops/test_squeeze.py | 389 ++++++------ backends/arm/test/ops/test_sub.py | 115 +--- backends/arm/test/ops/test_sum.py | 247 +++----- backends/arm/test/ops/test_tanh.py | 203 +++---- backends/arm/test/ops/test_to_copy.py | 75 ++- backends/arm/test/ops/test_unary.py | 77 ++- backends/arm/test/ops/test_unsqueeze.py | 156 ++--- .../arm/test/ops/test_upsample_nearest2d.py | 262 ++++---- backends/arm/test/ops/test_var.py | 546 +++++++++-------- backends/arm/test/ops/test_view.py | 206 +++---- backends/arm/test/ops/test_where.py | 113 ++-- backends/arm/test/tester/test_pipeline.py | 43 +- 81 files changed, 5412 insertions(+), 6841 deletions(-) diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py index 8aabf7c2c59..46cf3e17a73 100644 --- a/backends/arm/scripts/parse_test_names.py +++ b/backends/arm/scripts/parse_test_names.py @@ -5,7 +5,15 @@ from executorch.exir.dialects.edge.spec.utils import SAMPLE_INPUT # Add edge ops which we lower but which are not included in exir/dialects/edge/edge.yaml here. -CUSTOM_EDGE_OPS = ["linspace.default", "eye.default"] +CUSTOM_EDGE_OPS = [ + "linspace.default", + "eye.default", + "hardsigmoid.default", + "hardswish.default", + "linear.default", + "maximum.default", + "adaptive_avg_pool2d.default", +] ALL_EDGE_OPS = SAMPLE_INPUT.keys() | CUSTOM_EDGE_OPS # Add all targets and TOSA profiles we support here. diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index 57606e51f47..3f90c8c056c 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -259,17 +259,15 @@ def decorator_func(func): raise RuntimeError( "xfail info needs to be str, or tuple[str, type[Exception]]" ) - pytest_param = pytest.param( - test_parameters, - id=id, - marks=pytest.mark.xfail( - reason=reason, raises=raises, strict=strict - ), + # Set up our fail marker + marker = ( + pytest.mark.xfail(reason=reason, raises=raises, strict=strict), ) else: - pytest_param = pytest.param(test_parameters, id=id) - pytest_testsuite.append(pytest_param) + marker = () + pytest_param = pytest.param(test_parameters, id=id, marks=marker) + pytest_testsuite.append(pytest_param) return pytest.mark.parametrize(arg_name, pytest_testsuite)(func) return decorator_func diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index db097e9d7d9..2d247f7bd42 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -12,12 +12,6 @@ import pytest -try: - import tosa_tools.v0_80.tosa_reference_model as tosa_reference_model -except ImportError: - logging.warning("tosa_reference_model not found, can't run reference model tests") - tosa_reference_model = None - """ This file contains the pytest hooks, fixtures etc. for the Arm test suite. """ @@ -50,10 +44,11 @@ def pytest_configure(config): if getattr(config.option, "fast_fvp", False): pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] - # TODO: remove this flag once we have a way to run the reference model tests with Buck - pytest._test_options["tosa_ref_model"] = False # type: ignore[attr-defined] - if tosa_reference_model is not None: - pytest._test_options["tosa_ref_model"] = True # type: ignore[attr-defined] + if config.option.arm_run_tosa_version: + pytest._test_options["tosa_version"] = config.option.arm_run_tosa_version + + pytest._test_options["tosa_ref_model"] = True # type: ignore[attr-defined] + logging.basicConfig(level=logging.INFO, stream=sys.stdout) @@ -76,6 +71,7 @@ def try_addoption(*args, **kwargs): nargs="+", help="List of two files. Firstly .pt file. Secondly .json", ) + try_addoption("--arm_run_tosa_version", action="store", default="0.80") def pytest_sessionstart(session): diff --git a/backends/arm/test/ops/test_abs.py b/backends/arm/test/ops/test_abs.py index 481c7d5ed0d..ed7e616e946 100644 --- a/backends/arm/test/ops/test_abs.py +++ b/backends/arm/test/ops/test_abs.py @@ -1,125 +1,68 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestAbs(unittest.TestCase): - class Abs(torch.nn.Module): - test_parameters = [ - (torch.zeros(5),), - (torch.full((5,), -1, dtype=torch.float32),), - (torch.ones(5) * -1,), - (torch.randn(8),), - (torch.randn(2, 3, 4),), - (torch.randn(1, 2, 3, 4),), - (torch.normal(mean=0, std=10, size=(2, 3, 4)),), - ] - - def forward(self, x): - return torch.abs(x) - - def _test_abs_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.abs.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["torch.ops.aten.abs.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_abs_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.abs.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_abs_ethosu_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.abs.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(Abs.test_parameters) - def test_abs_tosa_MI(self, test_data: torch.Tensor): - test_data = (test_data,) - self._test_abs_tosa_MI_pipeline(self.Abs(), test_data) - - @parameterized.expand(Abs.test_parameters) - def test_abs_tosa_BI(self, test_data: torch.Tensor): - test_data = (test_data,) - self._test_abs_tosa_BI_pipeline(self.Abs(), test_data) - - @parameterized.expand(Abs.test_parameters) - @pytest.mark.corstone_fvp - def test_abs_u55_BI(self, test_data: torch.Tensor): - test_data = (test_data,) - self._test_abs_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Abs(), test_data - ) - - @parameterized.expand(Abs.test_parameters) - @pytest.mark.corstone_fvp - def test_abs_u85_BI(self, test_data: torch.Tensor): - test_data = (test_data,) - self._test_abs_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Abs(), test_data - ) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.abs.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_abs_default" + +input_t1 = Tuple[torch.Tensor] # Input x + + +class Abs(torch.nn.Module): + test_parameters = { + "zeros": lambda: (torch.zeros(5),), + "full": lambda: (torch.full((5,), -1, dtype=torch.float32),), + "ones": lambda: (torch.ones(5) * -1,), + "randn_1d": lambda: (torch.randn(8),), + "randn_3d": lambda: (torch.randn(2, 3, 4),), + "randn_4d": lambda: (torch.randn(1, 2, 3, 4),), + "torch_normal": lambda: (torch.normal(mean=0, std=10, size=(2, 3, 4)),), + } + + def forward(self, x): + return torch.abs(x) + + +@common.parametrize("test_data", Abs.test_parameters) +def test_abs_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1](Abs(), test_data(), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", Abs.test_parameters) +def test_abs_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1](Abs(), test_data(), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", Abs.test_parameters) +@common.XfailIfNoCorstone300 +def test_abs_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Abs(), test_data(), aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() + + +@common.parametrize("test_data", Abs.test_parameters) +@common.XfailIfNoCorstone320 +def test_abs_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Abs(), test_data(), aten_op, exir_op, run_on_fvp=True + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py index 486e53c5f03..67833576886 100644 --- a/backends/arm/test/ops/test_add.py +++ b/backends/arm/test/ops/test_add.py @@ -10,18 +10,18 @@ import torch from executorch.backends.arm.arm_backend import get_tosa_spec from executorch.backends.arm.quantizer import arm_quantizer -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.test_pipeline import ( EthosU55PipelineBI, EthosU85PipelineBI, TosaPipelineBI, TosaPipelineMI, ) +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester import Quantize from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.quantizer import QuantizationSpec - aten_op = "torch.ops.aten.add.Tensor" exir_op = "executorch_exir_dialects_edge__ops_aten_add_Tensor" @@ -33,11 +33,11 @@ def forward(self, x: torch.Tensor): return x + x test_data: list[input_t1] = { - "5d_float": (torch.FloatTensor([1, 2, 3, 5, 7]),), - "1d_ones": ((3 * torch.ones(8),)), - "1d_randn": (10 * torch.randn(8),), - "4d_ones_1": (torch.ones(1, 1, 4, 4),), - "4d_ones_2": (torch.ones(1, 3, 4, 2),), + "5d_float": lambda: (torch.FloatTensor([1, 2, 3, 5, 7]),), + "1d_ones": lambda: ((3 * torch.ones(8),)), + "1d_randn": lambda: (10 * torch.randn(8),), + "4d_ones_1": lambda: (torch.ones(1, 1, 4, 4),), + "4d_ones_2": lambda: (torch.ones(1, 3, 4, 2),), } @@ -49,14 +49,17 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): return x + y test_data: list[input_t2] = { - "5d_float": ( + "5d_float": lambda: ( torch.FloatTensor([1, 2, 3, 5, 7]), (torch.FloatTensor([2, 1, 2, 1, 10])), ), - "4d_ones": (torch.ones(1, 10, 4, 6), torch.ones(1, 10, 4, 6)), - "4d_randn_1": (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), - "4d_randn_2": (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), - "4d_randn_big": (10000 * torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 1)), + "4d_ones": lambda: (torch.ones(1, 10, 4, 6), torch.ones(1, 10, 4, 6)), + "4d_randn_1": lambda: (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), + "4d_randn_2": lambda: (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), + "4d_randn_big": lambda: ( + 10000 * torch.randn(1, 1, 4, 4), + torch.randn(1, 1, 4, 1), + ), } @@ -65,31 +68,35 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): return x + y test_data: list[input_t2] = { - "3d_randn_diff_rank": (torch.randn(1, 4, 5), torch.randn(4, 1)), - "4d_randn_diff_rank": (torch.randn(1, 1, 4, 4), torch.randn(4, 1)), - "4d_randn_diff_rank_2": (torch.randn(4, 1), torch.randn(1, 1, 4, 5)), + "3d_randn_diff_rank": lambda: (torch.randn(1, 4, 5), torch.randn(4, 1)), + "4d_randn_diff_rank": lambda: (torch.randn(1, 1, 4, 4), torch.randn(4, 1)), + "4d_randn_diff_rank_2": lambda: (torch.randn(4, 1), torch.randn(1, 1, 4, 5)), } @common.parametrize("test_data", Add.test_data) -def test_add_tosa_MI(test_data: input_t1): - pipeline = TosaPipelineMI[input_t1](Add(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1](Add(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add.test_data) -def test_add_tosa_BI(test_data: input_t1): - pipeline = TosaPipelineBI[input_t1](Add(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](Add(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add.test_data) -def test_add_i32_tosa_BI(test_data: input_t1): - pipeline = TosaPipelineBI[input_t1](Add(), test_data, aten_op, exir_op) - +def test_add_tensor_tosa_BI_i32(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](Add(), test_data(), aten_op, exir_op) + tosa_version = conftest.get_option("tosa_version") + tosa_profiles = { + "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"), + "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"), + } # Create a quantizer with int8 quantization on the input and output but int32 on everything else. quantizer = arm_quantizer.TOSAQuantizer( - get_tosa_spec(common.get_tosa_compile_spec("TOSA-0.80+BI")) + get_tosa_spec(common.get_tosa_compile_spec(tosa_profiles[tosa_version])) ) quantizer.set_io(arm_quantizer.get_symmetric_quantization_config()) observer_options = {"eps": 2**-16} @@ -117,59 +124,59 @@ def test_add_i32_tosa_BI(test_data: input_t1): @common.parametrize("test_data", Add.test_data) @common.XfailIfNoCorstone300 -def test_add_u55_BI(test_data: input_t1): +def test_add_tensor_u55_BI(test_data: input_t1): pipeline = EthosU55PipelineBI[input_t1]( - Add(), test_data, aten_op, exir_op, run_on_fvp=True + Add(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() @common.parametrize("test_data", Add.test_data) @common.XfailIfNoCorstone320 -def test_add_u85_BI(test_data: input_t1): +def test_add_tensor_u85_BI(test_data: input_t1): pipeline = EthosU85PipelineBI[input_t1]( - Add(), test_data, aten_op, exir_op, run_on_fvp=True + Add(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() @common.parametrize("test_data", Add2.test_data) -def test_add_2_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Add2(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_MI_2(test_data: input_t2): + pipeline = TosaPipelineMI[input_t2](Add2(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add3.test_data) -def test_add3_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Add3(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_MI_3(test_data: input_t2): + pipeline = TosaPipelineMI[input_t2](Add3(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add3.test_data) -def test_add3_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Add3(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_BI_3(test_data: input_t2): + pipeline = TosaPipelineBI[input_t2](Add3(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add2.test_data) -def test_add_2_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Add2(), test_data, aten_op, exir_op) +def test_add_tensor_tosa_BI_2(test_data: input_t2): + pipeline = TosaPipelineBI[input_t2](Add2(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Add2.test_data) @common.XfailIfNoCorstone300 -def test_add_2_u55_BI(test_data: input_t2): +def test_add_tensor_u55_BI_2(test_data: input_t2): pipeline = EthosU55PipelineBI[input_t2]( - Add2(), test_data, aten_op, exir_op, run_on_fvp=True + Add2(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() @common.parametrize("test_data", Add2.test_data) @common.XfailIfNoCorstone320 -def test_add_2_u85_BI(test_data: input_t2): +def test_add_tensor_u85_BI_2(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Add2(), test_data, aten_op, exir_op, run_on_fvp=True + Add2(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() diff --git a/backends/arm/test/ops/test_alias_copy.py b/backends/arm/test/ops/test_alias_copy.py index 66fa92bc445..44787fed950 100644 --- a/backends/arm/test/ops/test_alias_copy.py +++ b/backends/arm/test/ops/test_alias_copy.py @@ -30,10 +30,10 @@ class AliasCopy(torch.nn.Module): exir_op = "executorch_exir_dialects_edge__ops_aten_alias_copy_default" test_data: dict[input_t1] = { - "1d_ramp": (torch.arange(-16, 16, 0.2),), - "2d_ones": (torch.ones(5, 5),), - "3d_rand": (torch.rand(3, 5, 5),), - "4d_zeros": (torch.zeros(1, 10, 10, 10),), + "1d_ramp": lambda: (torch.arange(-16, 16, 0.2),), + "2d_ones": lambda: (torch.ones(5, 5),), + "3d_rand": lambda: (torch.rand(3, 5, 5),), + "4d_zeros": lambda: (torch.zeros(1, 10, 10, 10),), } def __init__(self): @@ -44,40 +44,40 @@ def forward(self, x: torch.Tensor): @common.parametrize("test_data", AliasCopy.test_data) -def test_alias_copy_tosa_MI(test_data: input_t1): +def test_alias_tosa_MI(test_data: input_t1): TosaPipelineMI[input_t1]( AliasCopy(), - test_data, + test_data(), AliasCopy.aten_op, AliasCopy.exir_op, ).run() @common.parametrize("test_data", AliasCopy.test_data) -def test_alias_copy_tosa_BI(test_data: input_t1): +def test_alias_tosa_BI(test_data: input_t1): TosaPipelineBI[input_t1]( AliasCopy(), - test_data, + test_data(), AliasCopy.aten_op, AliasCopy.exir_op, ).run() @common.parametrize("test_data", AliasCopy.test_data) -def test_alias_copy_u55_BI(test_data: input_t1): +def test_alias_u55_BI(test_data: input_t1): EthosU55PipelineBI[input_t1]( AliasCopy(), - test_data, + test_data(), AliasCopy.aten_op, AliasCopy.exir_op, ).run() @common.parametrize("test_data", AliasCopy.test_data) -def test_alias_copy_u85_BI(test_data: input_t1): +def test_alias_u85_BI(test_data: input_t1): EthosU85PipelineBI[input_t1]( AliasCopy(), - test_data, + test_data(), AliasCopy.aten_op, AliasCopy.exir_op, ).run() diff --git a/backends/arm/test/ops/test_amax.py b/backends/arm/test/ops/test_amax.py index b2639a5f108..0d1f4257b7b 100644 --- a/backends/arm/test/ops/test_amax.py +++ b/backends/arm/test/ops/test_amax.py @@ -30,11 +30,11 @@ def forward(self, x): return torch.amax(x, self.dim, self.keep_dims) test_data: Dict[str, input_t] = { - "rank_1_dim_0": ((torch.rand([10]),), 0, False), - "rank_2_dim_1_keep_dims": ((torch.rand([2, 2]),), (1,), True), - "rank_4_all_dim": ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), - "rank_4_0,3_keep_dims": ((torch.rand([1, 2, 2, 2]),), (0, 3), True), - "rank_4_mult_batches": ((torch.rand([2, 2, 2, 2]),), (0), True), + "rank_1_dim_0": lambda: ((torch.rand([10]),), 0, False), + "rank_2_dim_1_keep_dims": lambda: ((torch.rand([2, 2]),), (1,), True), + "rank_4_all_dim": lambda: ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), + "rank_4_0,3_keep_dims": lambda: ((torch.rand([1, 2, 2, 2]),), (0, 3), True), + "rank_4_mult_batches": lambda: ((torch.rand([2, 2, 2, 2]),), (0), True), } @@ -51,10 +51,10 @@ def forward(self, x): return x[0] test_data: Dict[str, input_t] = { - "rank_1_dim_0": ((torch.rand([10]),), 0), - "rank_2_dim_1": ((torch.rand([2, 2]),), 1), - "rank_4_dim_2": ((torch.rand([2, 2, 2, 2]),), 2), - "rank_4_dim_3": ((torch.rand([2, 2, 2, 2]),), 3), + "rank_1_dim_0": lambda: ((torch.rand([10]),), 0), + "rank_2_dim_1": lambda: ((torch.rand([2, 2]),), 1), + "rank_4_dim_2": lambda: ((torch.rand([2, 2, 2, 2]),), 2), + "rank_4_dim_3": lambda: ((torch.rand([2, 2, 2, 2]),), 3), } @@ -70,44 +70,26 @@ def forward(self, x): @common.parametrize("test_data", Amax.test_data) def test_amax_tosa_MI(test_data: Amax.input_t): - data, dim, keep_dims = test_data - pipeline = TosaPipelineMI[Amax.input_t]( - Amax(dim, keep_dims), - data, - Amax.aten_op, - ) + data, dim, keep_dims = test_data() + pipeline = TosaPipelineMI[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op) pipeline.run() @common.parametrize("test_data", Amax.test_data) def test_amax_tosa_BI(test_data: Amax.input_t): - data, dim, keep_dims = test_data - pipeline = TosaPipelineBI[Amax.input_t]( - Amax(dim, keep_dims), - data, - Amax.aten_op, - ) + data, dim, keep_dims = test_data() + pipeline = TosaPipelineBI[Amax.input_t](Amax(dim, keep_dims), data, Amax.aten_op) pipeline.run() def test_amax_u55_BI_not_delegated(): - data, dim, keep_dims = Amax.test_data["rank_4_all_dim"] + data, dim, keep_dims = Amax.test_data["rank_4_all_dim"]() pipeline = OpNotSupportedPipeline[Amax.input_t]( Amax(dim, keep_dims), data, - "TOSA-0.80+BI+u55", {" executorch_exir_dialects_edge__ops_aten_amax_default": 1}, - ) - pipeline.run() - - -@common.parametrize("test_data", Amax.test_data) -def test_amax_u85_BI(test_data: Amax.input_t): - data, dim, keep_dims = test_data - pipeline = EthosU85PipelineBI[Amax.input_t]( - Amax(dim, keep_dims), - data, - Amax.aten_op, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -116,50 +98,43 @@ def test_amax_u85_BI(test_data: Amax.input_t): @common.parametrize("test_data", Amax.test_data, fvp_xfails) -@common.SkipIfNoCorstone320 -def test_amax_u85_BI_on_fvp(test_data: Amax.input_t): - data, dim, keep_dims = test_data +@common.XfailIfNoCorstone320 +def test_amax_u85_BI(test_data: Amax.input_t): + data, dim, keep_dims = test_data() pipeline = EthosU85PipelineBI[Amax.input_t]( - Amax(dim, keep_dims), data, Amax.aten_op, run_on_fvp=True + Amax(dim, keep_dims), + data, + Amax.aten_op, + run_on_fvp=True, ) pipeline.run() @common.parametrize("test_data", Max.test_data) -def test_max_to_amax_MI(test_data: Max.input_t): - data, dim = test_data - pipeline = TosaPipelineMI[Max.input_t]( - Max(dim), - data, - "torch.ops.aten.max", - ) +def test_max_dim_tosa_MI_to_amax(test_data: Max.input_t): + data, dim = test_data() + pipeline = TosaPipelineMI[Max.input_t](Max(dim), data, "torch.ops.aten.max") pipeline.run() @common.parametrize("test_data", Max.test_data) -def test_max_to_amax_BI(test_data: Max.input_t): - data, dim = test_data +def test_max_dim_tosa_BI_to_amax(test_data: Max.input_t): + data, dim = test_data() module = Max(dim) - pipeline = TosaPipelineBI[Max.input_t]( - module, - data, - "torch.ops.aten.amax", - ) + pipeline = TosaPipelineBI[Max.input_t](module, data, "torch.ops.aten.amax") pipeline.run() @pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer") -def test_max_index_not_delegated_BI(): - data, dim = Max.test_data["rank_4_dim_3"] +def test_max_dim_tosa_BI_not_delegated(): + data, dim = Max.test_data()["rank_4_dim_3"]() pipeline = OpNotSupportedPipeline[Max.input_t]( - MaxWithIndex(dim), data, "TOSA-0.80+BI", {} + MaxWithIndex(dim), data, {}, quantize=True ) pipeline.run() -def test_max_index_not_delegated_MI(): - data, dim = Max.test_data["rank_4_dim_3"] - pipeline = OpNotSupportedPipeline[Max.input_t]( - MaxWithIndex(dim), data, "TOSA-0.80+MI", {} - ) +def test_max_dim_tosa_MI_not_delegated(): + data, dim = Max.test_data["rank_4_dim_3"]() + pipeline = OpNotSupportedPipeline[Max.input_t](MaxWithIndex(dim), data, {}) pipeline.run() diff --git a/backends/arm/test/ops/test_amin.py b/backends/arm/test/ops/test_amin.py index 092ed472bce..d83a5ee8839 100644 --- a/backends/arm/test/ops/test_amin.py +++ b/backends/arm/test/ops/test_amin.py @@ -31,11 +31,11 @@ def forward(self, x): return torch.amin(x, self.dim, self.keep_dims) test_data: Dict[str, input_t] = { - "rank_1_dim_0": ((torch.rand([10]),), 0, False), - "rank_2_dim_1_keep_dims": ((torch.rand([2, 2]),), (1,), True), - "rank_4_all_dim": ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), - "rank_4_0,3_keep_dims": ((torch.rand([1, 2, 2, 2]),), (0, 3), True), - "rank_4_mult_batches": ((torch.rand([2, 2, 2, 2]),), (0), True), + "rank_1_dim_0": lambda: ((torch.rand([10]),), 0, False), + "rank_2_dim_1_keep_dims": lambda: ((torch.rand([2, 2]),), (1,), True), + "rank_4_all_dim": lambda: ((torch.rand([1, 2, 5, 5]),), (0, 1, 2, 3), False), + "rank_4_0,3_keep_dims": lambda: ((torch.rand([1, 2, 2, 2]),), (0, 3), True), + "rank_4_mult_batches": lambda: ((torch.rand([2, 2, 2, 2]),), (0), True), } @@ -52,10 +52,10 @@ def forward(self, x): return x[0] test_data: Dict[str, input_t] = { - "rank_1_dim_0": ((torch.rand([10]),), 0), - "rank_2_dim_1": ((torch.rand([2, 2]),), 1), - "rank_4_dim_2": ((torch.rand([2, 2, 2, 2]),), 2), - "rank_4_dim_3": ((torch.rand([2, 2, 2, 2]),), 3), + "rank_1_dim_0": lambda: ((torch.rand([10]),), 0), + "rank_2_dim_1": lambda: ((torch.rand([2, 2]),), 1), + "rank_4_dim_2": lambda: ((torch.rand([2, 2, 2, 2]),), 2), + "rank_4_dim_3": lambda: ((torch.rand([2, 2, 2, 2]),), 3), } @@ -71,7 +71,7 @@ def forward(self, x): @common.parametrize("test_data", Amin.test_data) def test_amin_tosa_MI(test_data: Amin.input_t): - data, dim, keep_dims = test_data + data, dim, keep_dims = test_data() pipeline = TosaPipelineMI[Amin.input_t]( Amin(dim, keep_dims), data, @@ -82,7 +82,7 @@ def test_amin_tosa_MI(test_data: Amin.input_t): @common.parametrize("test_data", Amin.test_data) def test_amin_tosa_BI(test_data: Amin.input_t): - data, dim, keep_dims = test_data + data, dim, keep_dims = test_data() pipeline = TosaPipelineBI[Amin.input_t]( Amin(dim, keep_dims), data, @@ -92,23 +92,13 @@ def test_amin_tosa_BI(test_data: Amin.input_t): def test_amin_u55_BI_not_delegated(): - data, dim, keep_dims = Amin.test_data["rank_4_all_dim"] + data, dim, keep_dims = Amin.test_data["rank_4_all_dim"]() pipeline = OpNotSupportedPipeline[Amin.input_t]( Amin(dim, keep_dims), data, - "TOSA-0.80+BI+u55", {" executorch_exir_dialects_edge__ops_aten_amin_default": 1}, - ) - pipeline.run() - - -@common.parametrize("test_data", Amin.test_data) -def test_amin_u85_BI(test_data: Amin.input_t): - data, dim, keep_dims = test_data - pipeline = EthosU85PipelineBI[Amin.input_t]( - Amin(dim, keep_dims), - data, - Amin.aten_op, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -117,50 +107,46 @@ def test_amin_u85_BI(test_data: Amin.input_t): @common.parametrize("test_data", Amin.test_data, fvp_xfails) -@common.SkipIfNoCorstone320 -def test_amin_u85_BI_on_fvp(test_data: Amin.input_t): - data, dim, keep_dims = test_data +@common.XfailIfNoCorstone320 +def test_amin_u85_BI(test_data: Amin.input_t): + data, dim, keep_dims = test_data() pipeline = EthosU85PipelineBI[Amin.input_t]( - Amin(dim, keep_dims), data, Amin.aten_op, run_on_fvp=True + Amin(dim, keep_dims), + data, + Amin.aten_op, + run_on_fvp=True, ) pipeline.run() @common.parametrize("test_data", Min.test_data) -def test_min_to_amin_MI(test_data: Min.input_t): - data, dim = test_data - pipeline = TosaPipelineMI[Min.input_t]( - Min(dim), - data, - "torch.ops.aten.min", - ) +def test_min_dim_tosa_MI_to_amin(test_data: Min.input_t): + data, dim = test_data() + pipeline = TosaPipelineMI[Min.input_t](Min(dim), data, "torch.ops.aten.min") pipeline.run() @common.parametrize("test_data", Min.test_data) -def test_min_to_amin_BI(test_data: Min.input_t): - data, dim = test_data +def test_min_dim_tosa_BI_to_amin(test_data: Min.input_t): + data, dim = test_data() module = Min(dim) - pipeline = TosaPipelineBI[Min.input_t]( - module, - data, - "torch.ops.aten.amin", - ) + pipeline = TosaPipelineBI[Min.input_t](module, data, "torch.ops.aten.amin") pipeline.run() @pytest.mark.xfail(reason="MLETORCH-718 : Quantization of indices in arm_quantizer") -def test_max_index_not_delegated_BI(): - data, dim = Min.test_data["rank_4_dim_3"] +def test_min_dim_tosa_BI_not_delegated(): + data, dim = Min.test_data["rank_4_dim_3"]() pipeline = OpNotSupportedPipeline[Min.input_t]( - MinWithIndex(dim), data, "TOSA-0.80+BI", {} + MinWithIndex(dim), + data, + {}, + quantize=True, ) pipeline.run() -def test_max_index_not_delegated_MI(): - data, dim = Min.test_data["rank_4_dim_3"] - pipeline = OpNotSupportedPipeline[Min.input_t]( - MinWithIndex(dim), data, "TOSA-0.80+MI", {} - ) +def test_min_dim_tosa_MI_not_delegated(): + data, dim = Min.test_data["rank_4_dim_3"]() + pipeline = OpNotSupportedPipeline[Min.input_t](MinWithIndex(dim), data, {}) pipeline.run() diff --git a/backends/arm/test/ops/test_any.py b/backends/arm/test/ops/test_any.py index b5de87061ea..6ddef1ad0b5 100644 --- a/backends/arm/test/ops/test_any.py +++ b/backends/arm/test/ops/test_any.py @@ -45,90 +45,94 @@ def forward(self, x: torch.Tensor): test_input: dict[input_t1] = { - "rank1": (torch.tensor([True, False, False], dtype=torch.bool), 0, True), - "rank1_squeeze": (torch.tensor([True, False, False], dtype=torch.bool), -1, False), - "rank2": ( + "rank1": lambda: (torch.tensor([True, False, False], dtype=torch.bool), 0, True), + "rank1_squeeze": lambda: ( + torch.tensor([True, False, False], dtype=torch.bool), + -1, + False, + ), + "rank2": lambda: ( torch.randint(0, 2, (2, 3), dtype=torch.bool), 0, True, ), - "rank2_squeeze": ( + "rank2_squeeze": lambda: ( torch.randint(0, 2, (2, 3), dtype=torch.bool), 0, False, ), - "rank2_dims": ( + "rank2_dims": lambda: ( torch.randint(0, 2, (2, 3), dtype=torch.bool), [0, 1], True, ), - "rank2_dims_squeeze": ( + "rank2_dims_squeeze": lambda: ( torch.randint(0, 2, (2, 3), dtype=torch.bool), [-2, 1], False, ), - "rank3_dims_squeeze": ( + "rank3_dims_squeeze": lambda: ( torch.randint(0, 2, (6, 8, 10), dtype=torch.bool), [1, 2], False, ), - "rank4": ( + "rank4": lambda: ( torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool), 1, True, ), - "rank4_squeeze": ( + "rank4_squeeze": lambda: ( torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool), 1, False, ), - "rank4_dims": ( + "rank4_dims": lambda: ( torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool), [0, 2], True, ), - "rank4_dims_squeeze": ( + "rank4_dims_squeeze": lambda: ( torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool), [1, -1], False, ), - "rank1_reduce_all": (torch.tensor([True, False, False], dtype=torch.bool),), - "rank2_reduce_all": (torch.randint(0, 2, (2, 3), dtype=torch.bool),), - "rank3_reduce_all": (torch.randint(0, 2, (6, 8, 10), dtype=torch.bool),), - "rank4_reduce_all": (torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool),), + "rank1_reduce_all": lambda: (torch.tensor([True, False, False], dtype=torch.bool),), + "rank2_reduce_all": lambda: (torch.randint(0, 2, (2, 3), dtype=torch.bool),), + "rank3_reduce_all": lambda: (torch.randint(0, 2, (6, 8, 10), dtype=torch.bool),), + "rank4_reduce_all": lambda: (torch.randint(0, 2, (1, 6, 8, 10), dtype=torch.bool),), } test_data = { - "any_rank1": (AnyDim(), test_input["rank1"]), - "any_rank1_squeeze": (AnyDim(), test_input["rank1_squeeze"]), - "any_rank2": (AnyDim(), test_input["rank2"]), - "any_rank2_squeeze": (AnyDim(), test_input["rank2_squeeze"]), - "any_rank2_dims": (AnyDims(), test_input["rank2_dims"]), - "any_rank2_dims_squeeze": (AnyDims(), test_input["rank2_dims_squeeze"]), - "any_rank3_dims_squeeze": (AnyDims(), test_input["rank3_dims_squeeze"]), - "any_rank4": (AnyDim(), test_input["rank4"]), - "any_rank4_squeeze": (AnyDim(), test_input["rank4_squeeze"]), - "any_rank4_dims": (AnyDims(), test_input["rank4_dims"]), - "any_rank4_dims_squeeze": (AnyDims(), test_input["rank4_dims_squeeze"]), - "any_rank1_reduce_all": (AnyReduceAll(), test_input["rank1_reduce_all"]), - "any_rank2_reduce_all": (AnyReduceAll(), test_input["rank2_reduce_all"]), - "any_rank3_reduce_all": (AnyReduceAll(), test_input["rank3_reduce_all"]), - "any_rank4_reduce_all": (AnyReduceAll(), test_input["rank4_reduce_all"]), + "any_rank1": lambda: (AnyDim(), test_input["rank1"]), + "any_rank1_squeeze": lambda: (AnyDim(), test_input["rank1_squeeze"]), + "any_rank2": lambda: (AnyDim(), test_input["rank2"]), + "any_rank2_squeeze": lambda: (AnyDim(), test_input["rank2_squeeze"]), + "any_rank2_dims": lambda: (AnyDims(), test_input["rank2_dims"]), + "any_rank2_dims_squeeze": lambda: (AnyDims(), test_input["rank2_dims_squeeze"]), + "any_rank3_dims_squeeze": lambda: (AnyDims(), test_input["rank3_dims_squeeze"]), + "any_rank4": lambda: (AnyDim(), test_input["rank4"]), + "any_rank4_squeeze": lambda: (AnyDim(), test_input["rank4_squeeze"]), + "any_rank4_dims": lambda: (AnyDims(), test_input["rank4_dims"]), + "any_rank4_dims_squeeze": lambda: (AnyDims(), test_input["rank4_dims_squeeze"]), + "any_rank1_reduce_all": lambda: (AnyReduceAll(), test_input["rank1_reduce_all"]), + "any_rank2_reduce_all": lambda: (AnyReduceAll(), test_input["rank2_reduce_all"]), + "any_rank3_reduce_all": lambda: (AnyReduceAll(), test_input["rank3_reduce_all"]), + "any_rank4_reduce_all": lambda: (AnyReduceAll(), test_input["rank4_reduce_all"]), } @common.parametrize("test_data", test_data) def test_any_tosa_MI(test_data: input_t1): - op, test_input = test_data - pipeline = TosaPipelineMI[input_t1](op, test_input, op.aten_op, op.exir_op) + op, test_input = test_data() + pipeline = TosaPipelineMI[input_t1](op, test_input(), op.aten_op, op.exir_op) pipeline.run() @common.parametrize("test_data", test_data) def test_any_tosa_BI(test_data: input_t1): - op, test_input = test_data - pipeline = TosaPipelineBI[input_t1](op, test_input, op.aten_op, op.exir_op) + op, test_input = test_data() + pipeline = TosaPipelineBI[input_t1](op, test_input(), op.aten_op, op.exir_op) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -137,9 +141,13 @@ def test_any_tosa_BI(test_data: input_t1): @common.parametrize("test_data", test_data) def test_any_u55_BI(test_data: input_t1): # Tests that we don't delegate these ops since they are not supported on U55. - op, test_input = test_data + op, test_input = test_data() pipeline = OpNotSupportedPipeline[input_t1]( - op, test_input, "TOSA-0.80+BI+u55", {op.exir_op: 1} + op, + test_input(), + {op.exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -148,9 +156,13 @@ def test_any_u55_BI(test_data: input_t1): @pytest.mark.xfail(reason="MLETORCH-706: Support ScalarType::Bool in EthosUBackend.") @common.XfailIfNoCorstone320 def test_any_u85_BI(test_data: input_t1): - op, test_input = test_data + op, test_input = test_data() pipeline = EthosU85PipelineBI[input_t1]( - op, test_input, op.aten_op, op.exir_op, run_on_fvp=True + op, + test_input(), + op.aten_op, + op.exir_op, + run_on_fvp=True, ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") diff --git a/backends/arm/test/ops/test_arange.py b/backends/arm/test/ops/test_arange.py index 124f3ee597e..cb5f329a7f9 100644 --- a/backends/arm/test/ops/test_arange.py +++ b/backends/arm/test/ops/test_arange.py @@ -54,16 +54,22 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def test_arange_start_step_tosa_MI(test_data: test_data_t): input_data, init_data = test_data pipeline = TosaPipelineMI[input_t]( - ArangeAdd(*init_data), input_data(), ArangeAdd.aten_op, ArangeAdd.exir_op + ArangeAdd(*init_data), + input_data(), + ArangeAdd.aten_op, + ArangeAdd.exir_op, ) pipeline.run() @common.parametrize("test_data", ArangeAdd.test_data_dtypes) -def test_arange_start_step_dtypes_tosa_MI(test_data: test_data_t): +def test_arange_start_step_tosa_MI_dtypes(test_data: test_data_t): input_data, init_data = test_data pipeline = TosaPipelineMI[input_t]( - ArangeAdd(*init_data), input_data(), ArangeAdd.aten_op, ArangeAdd.exir_op + ArangeAdd(*init_data), + input_data(), + ArangeAdd.aten_op, + ArangeAdd.exir_op, ) pipeline.run() @@ -72,27 +78,34 @@ def test_arange_start_step_dtypes_tosa_MI(test_data: test_data_t): def test_arange_start_step_tosa_BI(test_data: test_data_t): input_data, init_data = test_data pipeline = TosaPipelineBI[input_t]( - ArangeAdd(*init_data), input_data(), ArangeAdd.aten_op, ArangeAdd.exir_op + ArangeAdd(*init_data), + input_data(), + ArangeAdd.aten_op, + ArangeAdd.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", ArangeAdd.test_data) -def test_arange_start_step_tosa_u55(test_data: test_data_t): +def test_arange_start_step_u55_BI(test_data: test_data_t): input_data, init_data = test_data pipeline = EthosU55PipelineBI[input_t]( - ArangeAdd(*init_data), input_data(), ArangeAdd.aten_op + ArangeAdd(*init_data), + input_data(), + ArangeAdd.aten_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", ArangeAdd.test_data) -def test_arange_start_step_tosa_u85(test_data: test_data_t): +def test_arange_start_step_u85_BI(test_data: test_data_t): input_data, init_data = test_data pipeline = EthosU85PipelineBI[input_t]( - ArangeAdd(*init_data), input_data(), ArangeAdd.aten_op + ArangeAdd(*init_data), + input_data(), + ArangeAdd.aten_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -120,7 +133,10 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def test_linspace_tosa_MI(test_data): input_data, init_data = test_data pipeline = TosaPipelineMI[input_t]( - LinspaceAdd(*init_data), input_data(), LinspaceAdd.aten_op, LinspaceAdd.exir_op + LinspaceAdd(*init_data), + input_data(), + LinspaceAdd.aten_op, + LinspaceAdd.exir_op, ) pipeline.run() @@ -129,7 +145,10 @@ def test_linspace_tosa_MI(test_data): def test_linspace_tosa_BI(test_data: test_data_t): input_data, init_data = test_data pipeline = TosaPipelineBI[input_t]( - LinspaceAdd(*init_data), input_data(), LinspaceAdd.aten_op, LinspaceAdd.exir_op + LinspaceAdd(*init_data), + input_data(), + LinspaceAdd.aten_op, + LinspaceAdd.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() diff --git a/backends/arm/test/ops/test_avg_pool2d.py b/backends/arm/test/ops/test_avg_pool2d.py index c48595aec7f..65c1830b9b2 100644 --- a/backends/arm/test/ops/test_avg_pool2d.py +++ b/backends/arm/test/ops/test_avg_pool2d.py @@ -1,6 +1,5 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. -# # Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the @@ -9,8 +8,6 @@ from typing import Tuple -import pytest - import torch from executorch.backends.arm.test import common, conftest @@ -23,9 +20,9 @@ TosaPipelineMI, ) - aten_op = "torch.ops.aten.avg_pool2d.default" exir_op = "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default" + input_t = Tuple[torch.Tensor] @@ -46,19 +43,19 @@ def forward(self, x): test_modules = { - "zeros": (AvgPool2d(4, 2, 0), (torch.zeros(1, 16, 50, 32),)), - "ones": (AvgPool2d(4, 2, 0), (torch.ones(1, 16, 50, 32),)), - "rand": (AvgPool2d(4, 2, 0), (torch.rand(1, 16, 50, 32),)), - "randn": (AvgPool2d(4, 2, 0), (torch.randn(1, 16, 50, 32),)), - "kernel_3x3_stride_1_pad_1": ( + "zeros": lambda: (AvgPool2d(4, 2, 0), (torch.zeros(1, 16, 50, 32),)), + "ones": lambda: (AvgPool2d(4, 2, 0), (torch.ones(1, 16, 50, 32),)), + "rand": lambda: (AvgPool2d(4, 2, 0), (torch.rand(1, 16, 50, 32),)), + "randn": lambda: (AvgPool2d(4, 2, 0), (torch.randn(1, 16, 50, 32),)), + "kernel_3x3_stride_1_pad_1": lambda: ( AvgPool2d((3, 3), (1, 1), 1), (torch.rand(1, 16, 50, 32),), ), - "kernel_3x2_stride_1x2_pad_1x0": ( + "kernel_3x2_stride_1x2_pad_1x0": lambda: ( AvgPool2d((3, 2), (1, 2), (1, 0)), (torch.rand(1, 16, 50, 32),), ), - "kernel_4x6_stride_1x2_pad_2x3": ( + "kernel_4x6_stride_1x2_pad_2x3": lambda: ( AvgPool2d((4, 6), (1, 2), (2, 3)), (torch.rand(1, 16, 50, 32),), ), @@ -66,9 +63,8 @@ def forward(self, x): @common.parametrize("test_module", test_modules) -@pytest.mark.tosa_ref_model -def test_avgpool2d_tosa_MI(test_module): - model, input_tensor = test_module +def test_avg_pool2d_tosa_MI(test_module): + model, input_tensor = test_module() pipeline = TosaPipelineMI[input_t]( model, @@ -83,9 +79,8 @@ def test_avgpool2d_tosa_MI(test_module): @common.parametrize("test_module", test_modules) -@pytest.mark.tosa_ref_model -def test_avgpool2d_tosa_BI(test_module): - model, input_tensor = test_module +def test_avg_pool2d_tosa_BI(test_module): + model, input_tensor = test_module() pipeline = TosaPipelineBI[input_t]( model, @@ -101,41 +96,9 @@ def test_avgpool2d_tosa_BI(test_module): @common.parametrize("test_module", test_modules) -def test_avgpool2d_u55_BI(test_module): - model, input_tensor = test_module - - pipeline = EthosU55PipelineBI[input_t]( - model, - input_tensor, - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_module", test_modules) -def test_avgpool2d_u85_BI(test_module): - model, input_tensor = test_module - - pipeline = EthosU85PipelineBI[input_t]( - model, - input_tensor, - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_module", test_modules) -@common.SkipIfNoCorstone300 -def test_avgpool2d_u55_BI_on_fvp(test_module): - model, input_tensor = test_module +@common.XfailIfNoCorstone300 +def test_avg_pool2d_u55_BI(test_module): + model, input_tensor = test_module() pipeline = EthosU55PipelineBI[input_t]( model, @@ -150,9 +113,9 @@ def test_avgpool2d_u55_BI_on_fvp(test_module): @common.parametrize("test_module", test_modules) -@common.SkipIfNoCorstone320 -def test_avgpool2d_u85_BI_on_fvp(test_module): - model, input_tensor = test_module +@common.XfailIfNoCorstone320 +def test_avg_pool2d_u85_BI(test_module): + model, input_tensor = test_module() pipeline = EthosU85PipelineBI[input_t]( model, @@ -168,14 +131,20 @@ def test_avgpool2d_u85_BI_on_fvp(test_module): reject_modules = { - "kernel_1x1_stride_1_pad_0": (AvgPool2d(1, 1, 0), torch.rand(2, 5, 5, 5)), - "kernel_2x9_stride_1_pad_1": (AvgPool2d((2, 9), 1, 1), torch.rand(1, 16, 5, 32)), - "kernel_1x4_stride_0_pad_0": (AvgPool2d(1, 4, 0), torch.rand(1, 10, 10, 10)), - "kernel_1x257_stride_1_pad_0_large": ( + "kernel_1x1_stride_1_pad_0": lambda: (AvgPool2d(1, 1, 0), torch.rand(2, 5, 5, 5)), + "kernel_2x9_stride_1_pad_1": lambda: ( + AvgPool2d((2, 9), 1, 1), + torch.rand(1, 16, 5, 32), + ), + "kernel_1x4_stride_0_pad_0": lambda: ( + AvgPool2d(1, 4, 0), + torch.rand(1, 10, 10, 10), + ), + "kernel_1x257_stride_1_pad_0_large": lambda: ( AvgPool2d((1, 257), 1, 0), torch.rand(1, 16, 5, 300), ), - "kernel_800x90_stride_1_pad_0_extreme": ( + "kernel_800x90_stride_1_pad_0_extreme": lambda: ( AvgPool2d((800, 90), 1, 0), torch.rand(1, 16, 850, 100), ), @@ -183,15 +152,15 @@ def test_avgpool2d_u85_BI_on_fvp(test_module): @common.parametrize("reject_module", reject_modules) -def test_reject_avgpool2d(reject_module): +def test_avg_pool2d_tosa_BI_not_delegated(reject_module): - model, test_data = reject_module + model, test_data = reject_module() pipeline = OpNotSupportedPipeline[input_t]( module=model, test_data=(test_data,), - tosa_version="TOSA-0.80+BI", non_delegated_ops={}, n_expected_delegates=0, + quantize=True, ) pipeline.run() diff --git a/backends/arm/test/ops/test_batch_norm.py b/backends/arm/test/ops/test_batch_norm.py index 980ab28df64..5134353c671 100644 --- a/backends/arm/test/ops/test_batch_norm.py +++ b/backends/arm/test/ops/test_batch_norm.py @@ -5,20 +5,25 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple +import pytest + import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +test_data_suite = { # (test_name, test_data, [num_features, affine, track_running_stats, weight, bias, running_mean, running_var,] ) - ( - "zeros_affineT_runStatsT_default_weight_bias_mean_var", + "zeros_affineT_runStatsT_default_weight_bias_mean_var": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -26,8 +31,7 @@ True, ], ), - ( - "zeros_affineF_runStatsT_default_weight_bias_mean_var", + "zeros_affineF_runStatsT_default_weight_bias_mean_var": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -35,8 +39,7 @@ True, ], ), - ( - "zeros_affineT_runStatsT_rand_weight_bias_mean_var", + "zeros_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -48,8 +51,7 @@ torch.rand(32), ], ), - ( - "zeros_affineF_runStatsT_rand_weight_bias_mean_var", + "zeros_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -61,8 +63,7 @@ torch.rand(32), ], ), - ( - "ones_affineT_runStatsT_default_weight_bias_mean_var", + "ones_affineT_runStatsT_default_weight_bias_mean_var": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -70,8 +71,7 @@ True, ], ), - ( - "ones_affineF_runStatsT_default_weight_bias_mean_var", + "ones_affineF_runStatsT_default_weight_bias_mean_var": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -79,8 +79,7 @@ True, ], ), - ( - "ones_affineT_runStatsT_rand_weight_bias_mean_var", + "ones_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -92,8 +91,7 @@ torch.rand(32), ], ), - ( - "ones_affineF_runStatsT_rand_weight_bias_mean_var", + "ones_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -105,8 +103,7 @@ torch.rand(32), ], ), - ( - "rand_affineT_runStatsT_default_weight_bias_mean_var", + "rand_affineT_runStatsT_default_weight_bias_mean_var": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -114,8 +111,7 @@ True, ], ), - ( - "rand_affineF_runStatsT_default_weight_bias_mean_var", + "rand_affineF_runStatsT_default_weight_bias_mean_var": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -123,8 +119,7 @@ True, ], ), - ( - "rand_affineT_runStatsT_rand_weight_bias_mean_var", + "rand_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -136,8 +131,7 @@ torch.rand(32), ], ), - ( - "rand_affineF_runStatsT_rand_weight_bias_mean_var", + "rand_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -149,8 +143,7 @@ torch.rand(32), ], ), - ( - "randn_affineT_runStatsT_default_weight_bias_mean_var", + "randn_affineT_runStatsT_default_weight_bias_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -158,8 +151,7 @@ True, ], ), - ( - "randn_affineF_runStatsT_default_weight_bias_mean_var", + "randn_affineF_runStatsT_default_weight_bias_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -167,8 +159,7 @@ True, ], ), - ( - "randn_affineT_runStatsT_rand_weight_bias_mean_var", + "randn_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -180,8 +171,7 @@ torch.rand(32), ], ), - ( - "randn_affineF_runStatsT_rand_weight_bias_mean_var", + "randn_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -194,100 +184,81 @@ ], ), # Test some different sizes - ( - "size_3_4_5_6_affineT_runStatsT_rand_weight_bias_mean_var", + "size_3_4_5_6_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(3, 4, 5, 6), [4, True, True, torch.rand(4), torch.rand(4), torch.rand(4), torch.rand(4)], ), - ( - "size_3_4_5_6_affineF_runStatsT_rand_weight_bias_mean_var", + "size_3_4_5_6_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(3, 4, 5, 6), [4, True, True, torch.rand(4), torch.rand(4), torch.rand(4), torch.rand(4)], ), - ( - "size_1_3_254_254_affineT_runStatsT_rand_weight_bias_mean_var", + "size_1_3_254_254_affineT_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 3, 254, 254), [3, True, True, torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(3)], ), - ( - "size_1_3_254_254_affineF_runStatsT_rand_weight_bias_mean_var", + "size_1_3_254_254_affineF_runStatsT_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 3, 254, 254), [3, True, True, torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(3)], ), # Test combination of weight and bias - ( - "check_weight_bias_affineT_runStatsT_none_none", + "check_weight_bias_affineT_runStatsT_none_none": lambda: ( torch.rand(1, 32, 112, 112), [32, True, True, None, None], ), - ( - "check_weight_bias_affineF_runStatsT_none_none", + "check_weight_bias_affineF_runStatsT_none_none": lambda: ( torch.rand(1, 32, 112, 112), [32, False, True, None, None], ), - ( - "check_weight_bias_affineT_runStatsT_weight_none", + "check_weight_bias_affineT_runStatsT_weight_none": lambda: ( torch.rand(1, 32, 112, 112), [32, True, True, torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsT_weight_none", + "check_weight_bias_affineF_runStatsT_weight_none": lambda: ( torch.rand(1, 32, 112, 112), [32, False, True, torch.rand(32)], ), - ( - "check_weight_bias_affineT_runStatsT_none_bias", + "check_weight_bias_affineT_runStatsT_none_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, True, True, None, torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsT_none_bias", + "check_weight_bias_affineF_runStatsT_none_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, False, True, None, torch.rand(32)], ), - ( - "check_weight_bias_affineT_runStatsT_weight_bias", + "check_weight_bias_affineT_runStatsT_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, True, True, torch.rand(32), torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsT_weight_bias", + "check_weight_bias_affineF_runStatsT_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, False, True, torch.rand(32), torch.rand(32)], ), # Test combination of running_mean and running_var - ( - "check_mean_var_affineT_runStatsT_none_none", + "check_mean_var_affineT_runStatsT_none_none": lambda: ( torch.randn(1, 32, 112, 112), [32, True, True, torch.rand(32), torch.rand(32), None, None], ), - ( - "check_mean_var_affineF_runStatsT_none_none", + "check_mean_var_affineF_runStatsT_none_none": lambda: ( torch.randn(1, 32, 112, 112), [32, False, True, torch.rand(32), torch.rand(32), None, None], ), - ( - "check_mean_var_affineT_runStatsT_mean_none", + "check_mean_var_affineT_runStatsT_mean_none": lambda: ( torch.randn(1, 32, 112, 112), [32, True, True, torch.rand(32), torch.rand(32), torch.rand(32), None], ), - ( - "check_mean_var_affineF_runStatsT_mean_none", + "check_mean_var_affineF_runStatsT_mean_none": lambda: ( torch.randn(1, 32, 112, 112), [32, False, True, torch.rand(32), torch.rand(32), torch.rand(32), None], ), - ( - "check_mean_var_affineT_runStatsT_none_var", + "check_mean_var_affineT_runStatsT_none_var": lambda: ( torch.randn(1, 32, 112, 112), [32, True, True, torch.rand(32), torch.rand(32), None, torch.rand(32)], ), - ( - "check_mean_var_affineF_runStatsT_none_var", + "check_mean_var_affineF_runStatsT_none_var": lambda: ( torch.randn(1, 32, 112, 112), [32, False, True, torch.rand(32), torch.rand(32), None, torch.rand(32)], ), - ( - "check_mean_var_affineT_runStatsT_mean_var", + "check_mean_var_affineT_runStatsT_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -299,8 +270,7 @@ torch.rand(32), ], ), - ( - "check_mean_var_affineF_runStatsT_mean_var", + "check_mean_var_affineF_runStatsT_mean_var": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -312,12 +282,11 @@ torch.rand(32), ], ), -] +} -test_no_stats_data_suite = [ +test_no_stats_data_suite = { # (test_name, test_data, [num_features, affine, track_running_stats, weight, bias, running_mean, running_var, ] ) - ( - "zeros_affineT_runStatsF_default_weight_bias", + "zeros_affineT_runStatsF_default_weight_bias": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -325,8 +294,7 @@ False, ], ), - ( - "zeros_affineF_runStatsF_default_weight_bias", + "zeros_affineF_runStatsF_default_weight_bias": lambda: ( torch.zeros(1, 32, 112, 112), [ 32, @@ -334,18 +302,15 @@ False, ], ), - ( - "zeros_affineT_runStatsF_rand_weight_bias", + "zeros_affineT_runStatsF_rand_weight_bias": lambda: ( torch.zeros(1, 32, 112, 112), [32, True, False, torch.rand(32), torch.rand(32)], ), - ( - "zeros_affineF_runStatsF_rand_weight_bias", + "zeros_affineF_runStatsF_rand_weight_bias": lambda: ( torch.zeros(1, 32, 112, 112), [32, False, False, torch.rand(32), torch.rand(32)], ), - ( - "ones_affineT_runStatsF_default_weight_bias", + "ones_affineT_runStatsF_default_weight_bias": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -353,8 +318,7 @@ False, ], ), - ( - "ones_affineF_runStatsF_default_weight_bias", + "ones_affineF_runStatsF_default_weight_bias": lambda: ( torch.ones(1, 32, 112, 112), [ 32, @@ -362,18 +326,15 @@ False, ], ), - ( - "ones_affineT_runStatsF_rand_weight_bias", + "ones_affineT_runStatsF_rand_weight_bias": lambda: ( torch.ones(1, 32, 112, 112), [32, True, False, torch.rand(32), torch.rand(32)], ), - ( - "ones_affineF_runStatsF", + "ones_affineF_runStatsF": lambda: ( torch.ones(1, 32, 112, 112), [32, False, False, torch.rand(32), torch.rand(32)], ), - ( - "rand_affineT_runStatsF_default_weight_bias", + "rand_affineT_runStatsF_default_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -381,8 +342,7 @@ False, ], ), - ( - "rand_affineF_runStatsF_default_weight_bias", + "rand_affineF_runStatsF_default_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [ 32, @@ -390,18 +350,15 @@ False, ], ), - ( - "rand_affineT_runStatsF_rand_weight_bias", + "rand_affineT_runStatsF_rand_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, True, False, torch.rand(32), torch.rand(32)], ), - ( - "rand_affineF_runStatsF_rand_weight_bias", + "rand_affineF_runStatsF_rand_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, False, False, torch.rand(32), torch.rand(32)], ), - ( - "randn_affineT_runStatsF_default_weight_bias", + "randn_affineT_runStatsF_default_weight_bias": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -409,8 +366,7 @@ False, ], ), - ( - "randn_affineF_runStatsF_default_weight_bias", + "randn_affineF_runStatsF_default_weight_bias": lambda: ( torch.randn(1, 32, 112, 112), [ 32, @@ -418,304 +374,148 @@ False, ], ), - ( - "randn_affineT_runStatsF_rand_weight_bias", + "randn_affineT_runStatsF_rand_weight_bias": lambda: ( torch.randn(1, 32, 112, 112), [32, True, False, torch.rand(32), torch.rand(32)], ), - ( - "randn_affineF_runStatsF_rand_weight_bias", + "randn_affineF_runStatsF_rand_weight_bias": lambda: ( torch.randn(1, 32, 112, 112), [32, False, False, torch.rand(32), torch.rand(32)], ), # Test some different sizes - ( - "size_3_4_5_6_affineT_runStatsF_rand_weight_bias_mean_var", + "size_3_4_5_6_affineT_runStatsF_rand_weight_bias_mean_var": lambda: ( torch.rand(3, 4, 5, 6), [4, True, False, torch.rand(4), torch.rand(4)], ), - ( - "size_3_4_5_6_affineF_runStatsF_rand_weight_bias_mean_var", + "size_3_4_5_6_affineF_runStatsF_rand_weight_bias_mean_var": lambda: ( torch.rand(3, 4, 5, 6), [4, True, False, torch.rand(4), torch.rand(4)], ), - ( - "size_1_3_254_254_affineT_runStatsF_rand_weight_bias_mean_var", + "size_1_3_254_254_affineT_runStatsF_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 3, 254, 254), [3, True, False, torch.rand(3), torch.rand(3)], ), - ( - "size_1_3_254_254_affineF_runStatsF_rand_weight_bias_mean_var", + "size_1_3_254_254_affineF_runStatsF_rand_weight_bias_mean_var": lambda: ( torch.rand(1, 3, 254, 254), [3, True, False, torch.rand(3), torch.rand(3)], ), # Test combination of weight and bias - ( - "check_weight_bias_affineT_runStatsF_none_none", + "check_weight_bias_affineT_runStatsF_none_none": lambda: ( torch.rand(1, 32, 112, 112), [32, True, False, None, None], ), - ( - "check_weight_bias_affineF_runStatsF_none_none", + "check_weight_bias_affineF_runStatsF_none_none": lambda: ( torch.rand(1, 32, 112, 112), [32, False, False, None, None], ), - ( - "check_weight_bias_affineT_runStatsF_weight_none", + "check_weight_bias_affineT_runStatsF_weight_none": lambda: ( torch.rand(1, 32, 112, 112), [32, True, False, torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsF_weight_none", + "check_weight_bias_affineF_runStatsF_weight_none": lambda: ( torch.rand(1, 32, 112, 112), [32, False, False, torch.rand(32)], ), - ( - "check_weight_bias_affineT_runStatsF_none_bias", + "check_weight_bias_affineT_runStatsF_none_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, True, False, None, torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsF_none_bias", + "check_weight_bias_affineF_runStatsF_none_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, False, False, None, torch.rand(32)], ), - ( - "check_weight_bias_affineT_runStatsF_weight_bias", + "check_weight_bias_affineT_runStatsF_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, True, False, torch.rand(32), torch.rand(32)], ), - ( - "check_weight_bias_affineF_runStatsF_weight_bias", + "check_weight_bias_affineF_runStatsF_weight_bias": lambda: ( torch.rand(1, 32, 112, 112), [32, False, False, torch.rand(32), torch.rand(32)], ), -] - - -class TestBatchNorm2d(unittest.TestCase): - """Tests BatchNorm2d.""" +} - class BatchNorm2d(torch.nn.Module): - def __init__( - self, - num_features: int = 32, - affine: bool = False, - track_running_stats: bool = True, - weights: torch.tensor = None, - bias: torch.tensor = None, - running_mean: torch.tensor = None, - running_var: torch.tensor = None, - ): - super().__init__() - self.batch_norm_2d = torch.nn.BatchNorm2d( - num_features, affine=affine, track_running_stats=track_running_stats - ) - if weights is not None: - self.batch_norm_2d.weight = torch.nn.Parameter(weights) - if bias is not None: - self.batch_norm_2d.bias = torch.nn.Parameter(bias) - if running_mean is not None: - self.batch_norm_2d.running_mean = running_mean - if running_var is not None: - self.batch_norm_2d.running_var = running_var - def forward(self, x): - return self.batch_norm_2d(x) - - def _test_batchnorm2d_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] +class BatchNorm2d(torch.nn.Module): + def __init__( + self, + num_features: int = 32, + affine: bool = False, + track_running_stats: bool = True, + weights: torch.tensor = None, + bias: torch.tensor = None, + running_mean: torch.tensor = None, + running_var: torch.tensor = None, ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count( - { - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 1 - } - ) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default" - ] - ) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) + super().__init__() + self.batch_norm_2d = torch.nn.BatchNorm2d( + num_features, affine=affine, track_running_stats=track_running_stats ) + if weights is not None: + self.batch_norm_2d.weight = torch.nn.Parameter(weights) + if bias is not None: + self.batch_norm_2d.bias = torch.nn.Parameter(bias) + if running_mean is not None: + self.batch_norm_2d.running_mean = running_mean + if running_var is not None: + self.batch_norm_2d.running_var = running_var - def _test_batchnorm2d_no_stats_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten._native_batch_norm_legit.no_stats": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count( - { - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_stats": 1 - } - ) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_stats" - ] - ) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) + def forward(self, x): + return self.batch_norm_2d(x) - def _test_batchnorm2d_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count( - {"torch.ops.aten._native_batch_norm_legit_no_training.default": 1} - ) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count( - { - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 1 - } - ) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default" - ] - ) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - def _test_batchnorm2d_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count( - {"torch.ops.aten._native_batch_norm_legit_no_training.default": 1} - ) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count( - { - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default": 1 - } - ) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default" - ] - ) - .to_executorch() - ) +@common.parametrize("test_data", test_data_suite) +def test_native_batch_norm_legit_tosa_MI_no_training(test_data: Tuple): + test_data, model_params = test_data() + pipeline = TosaPipelineMI[input_t1]( + BatchNorm2d(*model_params), + (test_data,), + aten_op=[], + exir_op="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default", + ) + pipeline.run() - @parameterized.expand(test_data_suite) - def test_native_batch_norm_legit_no_training_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: ( - int - | Tuple[ - int, bool, bool, torch.tensor, torch.tensor, torch.tensor, torch.tensor - ] - ), - ): - self._test_batchnorm2d_tosa_MI_pipeline( - self.BatchNorm2d(*model_params), (test_data,) - ) - # Expected to fail since not inplemented - @parameterized.expand(test_no_stats_data_suite) - @unittest.expectedFailure - def test_native_batch_norm_legit_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: ( - int - | Tuple[ - int, bool, bool, torch.tensor, torch.tensor, torch.tensor, torch.tensor - ] - ), - ): - self._test_batchnorm2d_no_stats_tosa_MI_pipeline( - self.BatchNorm2d(*model_params), (test_data,) - ) +@common.parametrize("test_data", test_no_stats_data_suite) +# Expected to fail since not inplemented +@pytest.mark.skip # Not implemented, skip until it is. +def test_native_batch_norm_legit_tosa_MI(test_data: Tuple): + test_data, model_params = test_data() + pipeline = TosaPipelineMI[input_t1]( + BatchNorm2d(*model_params), + (test_data,), + aten_op=[], + exir_op="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default", + ) + pipeline.pop_stage("check_count.exir") + pipeline.run() + - # Expected to fail since TOSAQuantizer cannot quantize a BatchNorm layer - # TODO(MLETORCH-100) - @parameterized.expand(test_data_suite) - @unittest.skip( - reason="Expected to fail since TOSAQuantizer (for BI) cannot quantize a BatchNorm layer" +# Expected to fail since TOSAQuantizer cannot quantize a BatchNorm layer +# TODO(MLETORCH-100) +@common.parametrize("test_data", test_data_suite) +@pytest.mark.skip # Not implemented, skip until it is. +def test_native_batch_norm_legit_tosa_BI_no_training(test_data: Tuple): + test_data, model_params = test_data() + pipeline = TosaPipelineBI[input_t1]( + BatchNorm2d(*model_params), + (test_data,), + aten_op="torch.ops.aten._native_batch_norm_legit_no_training.default", + exir_op="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default", ) - def test_native_batch_norm_legit_no_training_tosa_BI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: ( - int - | Tuple[ - int, bool, bool, torch.tensor, torch.tensor, torch.tensor, torch.tensor - ] - ), - ): - self._test_batchnorm2d_tosa_BI_pipeline( - self.BatchNorm2d(*model_params), (test_data,) - ) + pipeline.run() + - # Expected to fail since EthosUQuantizer (TOSAQuantizer (BI)) cannot quantize a BatchNorm layer - # TODO(MLETORCH-100) - @parameterized.expand(test_data_suite) - @unittest.skip( - reason="Expected to fail since EthosUQuantizer cannot quantize a BatchNorm layer" +# Expected to fail since EthosUQuantizer (TOSAQuantizer (BI)) cannot quantize a BatchNorm layer +# TODO(MLETORCH-100) +@common.parametrize("test_data", test_data_suite) +@pytest.mark.skip # Not implemented, skip until it is. +def test_native_batch_norm_legit_u55_BI_no_training(test_data: Tuple): + test_data, model_params = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + BatchNorm2d(*model_params), + test_data, + aten_ops="torch.ops.aten._native_batch_norm_legit_no_training.default", + exir_ops="executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default", + run_on_fvp=True, ) - @unittest.expectedFailure - def test_native_batch_norm_legit_no_training_u55_BI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: ( - int - | Tuple[ - int, bool, bool, torch.tensor, torch.tensor, torch.tensor, torch.tensor - ] - ), - ): - self._test_batchnorm2d_u55_BI_pipeline( - self.BatchNorm2d(*model_params), (test_data,) - ) + pipeline.run() diff --git a/backends/arm/test/ops/test_bitwise.py b/backends/arm/test/ops/test_bitwise.py index 412701b17da..8be8ba35b4e 100644 --- a/backends/arm/test/ops/test_bitwise.py +++ b/backends/arm/test/ops/test_bitwise.py @@ -22,19 +22,19 @@ class BitwiseBinary(torch.nn.Module): test_data: dict[input_t2] = { - "zeros": ( + "zeros": lambda: ( torch.zeros(1, 10, 10, 10, dtype=torch.int32), torch.zeros(1, 10, 10, 10, dtype=torch.int32), ), - "ones": ( + "ones": lambda: ( torch.ones(10, 10, 10, dtype=torch.int8), torch.ones(10, 10, 10, dtype=torch.int8), ), - "rand_rank2": ( + "rand_rank2": lambda: ( torch.randint(-128, 127, (10, 10), dtype=torch.int8), torch.randint(-128, 127, (10, 10), dtype=torch.int8), ), - "rand_rank4": ( + "rand_rank4": lambda: ( torch.randint(-128, -127, (1, 10, 10, 10), dtype=torch.int8), torch.randint(-128, 127, (1, 10, 10, 10), dtype=torch.int8), ), @@ -67,13 +67,17 @@ def forward(self, tensor1: torch.Tensor, tensor2: torch.Tensor): @common.parametrize("test_data", And().test_data) def test_bitwise_and_tensor_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](And(), test_data, And().aten_op, And().exir_op) + pipeline = TosaPipelineMI[input_t2]( + And(), test_data(), And().aten_op, And().exir_op + ) pipeline.run() @common.parametrize("test_data", And().test_data) def test_bitwise_and_tensor_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](And(), test_data, And().aten_op, And().exir_op) + pipeline = TosaPipelineBI[input_t2]( + And(), test_data(), And().aten_op, And().exir_op + ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -83,7 +87,11 @@ def test_bitwise_and_tensor_tosa_BI(test_data: input_t2): def test_bitwise_and_tensor_u55_BI(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - And(), test_data, "TOSA-0.80+BI+u55", {And().exir_op: 1} + And(), + test_data(), + {And().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -93,7 +101,7 @@ def test_bitwise_and_tensor_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_bitwise_and_tensor_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - And(), test_data, And().aten_op, And().exir_op, run_on_fvp=True + And(), test_data(), And().aten_op, And().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") @@ -102,13 +110,17 @@ def test_bitwise_and_tensor_u85_BI(test_data: input_t2): @common.parametrize("test_data", Xor().test_data) def test_bitwise_xor_tensor_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Xor(), test_data, Xor().aten_op, Xor().exir_op) + pipeline = TosaPipelineMI[input_t2]( + Xor(), test_data(), Xor().aten_op, Xor().exir_op + ) pipeline.run() @common.parametrize("test_data", Xor().test_data) def test_bitwise_xor_tensor_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Xor(), test_data, Xor().aten_op, Xor().exir_op) + pipeline = TosaPipelineBI[input_t2]( + Xor(), test_data(), Xor().aten_op, Xor().exir_op + ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -118,7 +130,11 @@ def test_bitwise_xor_tensor_tosa_BI(test_data: input_t2): def test_bitwise_xor_tensor_u55_BI(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - Xor(), test_data, "TOSA-0.80+BI+u55", {Xor().exir_op: 1} + Xor(), + test_data(), + {Xor().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -128,7 +144,7 @@ def test_bitwise_xor_tensor_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_bitwise_xor_tensor_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Xor(), test_data, Xor().aten_op, Xor().exir_op, run_on_fvp=True + Xor(), test_data(), Xor().aten_op, Xor().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") @@ -137,13 +153,13 @@ def test_bitwise_xor_tensor_u85_BI(test_data: input_t2): @common.parametrize("test_data", Or().test_data) def test_bitwise_or_tensor_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Or(), test_data, Or().aten_op, Or().exir_op) + pipeline = TosaPipelineMI[input_t2](Or(), test_data(), Or().aten_op, Or().exir_op) pipeline.run() @common.parametrize("test_data", Or().test_data) def test_bitwise_or_tensor_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Or(), test_data, Or().aten_op, Or().exir_op) + pipeline = TosaPipelineBI[input_t2](Or(), test_data(), Or().aten_op, Or().exir_op) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -153,7 +169,11 @@ def test_bitwise_or_tensor_tosa_BI(test_data: input_t2): def test_bitwise_or_tensor_u55_BI(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - Or(), test_data, "TOSA-0.80+BI+u55", {Or().exir_op: 1} + Or(), + test_data(), + {Or().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -163,7 +183,11 @@ def test_bitwise_or_tensor_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_bitwise_or_tensor_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Or(), test_data, Or().aten_op, Or().exir_op, run_on_fvp=True + Or(), + test_data(), + Or().aten_op, + Or().exir_op, + run_on_fvp=True, ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py index 375e77cb9b0..bd2c9338275 100644 --- a/backends/arm/test/ops/test_bmm.py +++ b/backends/arm/test/ops/test_bmm.py @@ -1,165 +1,162 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest -from typing import Callable, Tuple +from typing import Tuple import pytest import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestBMM(unittest.TestCase): - """Tests Batch MatMul""" - - class BMM(torch.nn.Module): - test_data_generators = [ - lambda: (torch.rand(2, 1, 1), torch.rand(2, 1, 1)), - lambda: (torch.rand(5, 3, 5), torch.rand(5, 5, 2)), - lambda: (torch.ones(1, 55, 3), torch.ones(1, 3, 44)), - lambda: (10000 * torch.randn(10, 1, 10), torch.randn(10, 10, 5)), - lambda: (-10 * torch.randn(2, 32, 64), 5 + 5 * torch.randn(2, 64, 32)), - ] - - def forward(self, x, y): - return torch.bmm(x, y) - - class BMMSingleInput(torch.nn.Module): - test_data_generators = [ - lambda: (torch.rand(20, 3, 3),), - lambda: (torch.rand(2, 128, 128),), - lambda: (10000 * torch.randn(4, 25, 25),), - lambda: (5 + 5 * torch.randn(3, 64, 64),), - ] - - def forward(self, x): - return torch.bmm(x, x) - - def _test_bmm_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, ...] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count({"executorch_exir_dialects_edge__ops_aten_bmm_default": 1}) - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_bmm_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_bmm_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, ...] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .check_count({"executorch_exir_dialects_edge__ops_aten_bmm_default": 1}) - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_bmm_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_bmm_ethosu_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor, ...], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.bmm.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) - - @parameterized.expand(BMM.test_data_generators) - def test_bmm_tosa_MI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_tosa_MI_pipeline(self.BMM(), test_data) - - @parameterized.expand(BMMSingleInput.test_data_generators) - @pytest.mark.flaky # TODO: Investigate flakyness (MLETORCH-534) - def test_bmm_single_input_tosa_MI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_tosa_MI_pipeline(self.BMMSingleInput(), test_data) - - @parameterized.expand(BMM.test_data_generators) - def test_bmm_tosa_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_tosa_BI_pipeline(self.BMM(), test_data) - - @parameterized.expand(BMMSingleInput.test_data_generators) - @pytest.mark.flaky # TODO: Investigate flakyness (MLETORCH-534) - def test_bmm_single_input_tosa_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_tosa_BI_pipeline(self.BMMSingleInput(), test_data) - - @parameterized.expand(BMM.test_data_generators) - @pytest.mark.corstone_fvp - def test_bmm_u55_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_ethosu_BI_pipeline( - self.BMM(), common.get_u55_compile_spec(), test_data - ) - - @parameterized.expand(BMM.test_data_generators) - @pytest.mark.corstone_fvp - def test_bmm_u85_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_ethosu_BI_pipeline( - self.BMM(), common.get_u85_compile_spec(), test_data - ) - - # Expected to fail on FVP as TOSA.MATMUL is not supported on U55 - @parameterized.expand(BMMSingleInput.test_data_generators) - @pytest.mark.corstone_fvp - def test_bmm_single_input_u55_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_ethosu_BI_pipeline( - self.BMMSingleInput(), common.get_u55_compile_spec(), test_data - ) - - @parameterized.expand(BMMSingleInput.test_data_generators) - @pytest.mark.corstone_fvp - def test_bmm_single_input_u85_BI(self, test_data_generator: Callable[[], Tuple]): - test_data = test_data_generator() - self._test_bmm_ethosu_BI_pipeline( - self.BMMSingleInput(), common.get_u85_compile_spec(), test_data - ) + +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op_bmm = "torch.ops.aten.bmm.default" +exir_op_bmm = "executorch_exir_dialects_edge__ops_aten_bmm_default" + +aten_op_mm = "torch.ops.aten.matmul.default" +exir_op_mm = "executorch_exir_dialects_edge__ops_aten_matmul_default" + +input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x + + +class BMM(torch.nn.Module): + test_data_generators = { + "rand_same": lambda: (torch.rand(2, 1, 1), torch.rand(2, 1, 1)), + "rand_diff": lambda: (torch.rand(5, 3, 5), torch.rand(5, 5, 2)), + "rand_ones": lambda: (torch.ones(1, 55, 3), torch.ones(1, 3, 44)), + "rand_big": lambda: (10000 * torch.randn(10, 1, 10), torch.randn(10, 10, 5)), + "rand_neg": lambda: ( + -10 * torch.randn(2, 32, 64), + 5 + 5 * torch.randn(2, 64, 32), + ), + } + + def forward(self, x, y): + return torch.bmm(x, y) + + +class MatMul(torch.nn.Module): + test_data_generators = { + "rand_3d": lambda: (torch.rand(2, 3, 5), torch.rand(2, 5, 2)), + "rand_4d": lambda: (torch.rand(1, 2, 3, 5), torch.rand(1, 2, 5, 2)), + } + + def forward(self, x, y): + return torch.matmul(x, y) + + +class BMMSingleInput(torch.nn.Module): + test_data_generators = { + "rand_3d_1": lambda: (torch.rand(20, 3, 3),), + "rand_3d_2": lambda: (torch.rand(2, 128, 128),), + "rand_big_1": lambda: (10000 * torch.randn(4, 25, 25),), + "rand_big_2": lambda: (5 + 5 * torch.randn(3, 64, 64),), + } + + def forward(self, x): + return torch.bmm(x, x) + + +@common.parametrize("test_data", BMM.test_data_generators) +def test_bmm_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1](BMM(), test_data(), aten_op_bmm, exir_op_bmm) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLETORCH-534) +@common.parametrize("test_data", BMMSingleInput.test_data_generators) +def test_bmm_tosa_MI_single_input(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1]( + BMMSingleInput(), test_data(), aten_op_bmm, exir_op_bmm + ) + pipeline.run() + + +@common.parametrize("test_data", MatMul.test_data_generators) +def test_mm_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1](MatMul(), test_data(), aten_op_mm, exir_op_mm) + pipeline.run() + + +@common.parametrize("test_data", MatMul.test_data_generators) +def test_mm_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](MatMul(), test_data(), aten_op_mm, exir_op_mm) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLETORCH-534) +@common.parametrize("test_data", BMM.test_data_generators) +def test_bmm_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](BMM(), test_data(), aten_op_bmm, exir_op_bmm) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLETORCH-534) +@common.parametrize("test_data", BMMSingleInput.test_data_generators) +def test_bmm_tosa_BI_single_input(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1]( + BMMSingleInput(), test_data(), aten_op_bmm, exir_op_bmm + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1) + pipeline.run() + + +@common.parametrize("test_data", BMM.test_data_generators) +@common.XfailIfNoCorstone300 +def test_bmm_u55_BI(test_data: input_t1): + pipeline = EthosU55PipelineBI[input_t1]( + BMM(), + test_data(), + aten_op_bmm, + exir_op_bmm, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", BMM.test_data_generators) +@common.XfailIfNoCorstone320 +def test_bmm_u85_BI(test_data: input_t1): + pipeline = EthosU85PipelineBI[input_t1]( + BMM(), + test_data(), + aten_op_bmm, + exir_op_bmm, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", BMMSingleInput.test_data_generators) +@common.XfailIfNoCorstone300 +def test_bmm_u55_BI_single_input(test_data: input_t1): + pipeline = EthosU55PipelineBI[input_t1]( + BMMSingleInput(), + test_data(), + aten_op_bmm, + exir_op_bmm, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", BMMSingleInput.test_data_generators) +@common.XfailIfNoCorstone320 +def test_bmm_u85_BI_single_input(test_data: input_t1): + pipeline = EthosU85PipelineBI[input_t1]( + BMMSingleInput(), + test_data(), + aten_op_bmm, + exir_op_bmm, + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_cat.py b/backends/arm/test/ops/test_cat.py index 63423b9e993..d5ebd6fe569 100644 --- a/backends/arm/test/ops/test_cat.py +++ b/backends/arm/test/ops/test_cat.py @@ -1,172 +1,138 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +input_t1 = Tuple[torch.Tensor] # Input x -class TestCat(unittest.TestCase): +aten_op = "torch.ops.aten.cat.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_cat_default" - class Cat(torch.nn.Module): - test_parameters = [ - ((torch.ones(1), torch.ones(1)), 0), - ((torch.ones(1, 2), torch.randn(1, 5), torch.randn(1, 1)), 1), + +class Cat(torch.nn.Module): + test_parameters = { + "cat_ones_two_tensors": lambda: ((torch.ones(1), torch.ones(1)), 0), + "cat_ones_and_rand_three_tensors": lambda: ( + (torch.ones(1, 2), torch.randn(1, 5), torch.randn(1, 1)), + 1, + ), + "cat_ones_and_rand_four_tensors": lambda: ( ( - ( - torch.ones(1, 2, 5), - torch.randn(1, 2, 4), - torch.randn(1, 2, 2), - torch.randn(1, 2, 1), - ), - -1, + torch.ones(1, 2, 5), + torch.randn(1, 2, 4), + torch.randn(1, 2, 2), + torch.randn(1, 2, 1), ), - ((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 1)), 3), - ((torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 4)), 0), - ((torch.randn(2, 2, 4, 4), torch.randn(2, 2, 4, 1)), 3), + -1, + ), + "cat_rand_two_tensors": lambda: ( + (torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 1)), + 3, + ), + "cat_rand_two_tensors_dim_0": lambda: ( + (torch.randn(1, 2, 4, 4), torch.randn(1, 2, 4, 4)), + 0, + ), + "cat_rand_two_tensors_dim_3": lambda: ( + (torch.randn(2, 2, 4, 4), torch.randn(2, 2, 4, 1)), + 3, + ), + "cat_rand_large": lambda: ( ( - ( - 10000 * torch.randn(2, 3, 1, 4), - torch.randn(2, 7, 1, 4), - torch.randn(2, 1, 1, 4), - ), - -3, + 10000 * torch.randn(2, 3, 1, 4), + torch.randn(2, 7, 1, 4), + torch.randn(2, 1, 1, 4), ), - ] - - def __init__(self): - super().__init__() - - def forward(self, t: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor: - return torch.cat(t, dim=dim) - - def _test_cat_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[tuple[torch.Tensor, ...], int] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.cat.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_cat_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[tuple[torch.Tensor, ...], int] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.cat.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_cat_ethosu_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[tuple[torch.Tensor, ...], int], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.cat.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_cat_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(inputs=test_data) - - @parameterized.expand(Cat.test_parameters) - def test_cat_tosa_MI(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_tosa_MI_pipeline(self.Cat(), test_data) - - def test_cat_4d_tosa_MI(self): - square = torch.ones((2, 2, 2, 2)) - for dim in range(-3, 3): - test_data = ((square, square.clone()), dim) - self._test_cat_tosa_MI_pipeline(self.Cat(), test_data) - - @parameterized.expand(Cat.test_parameters) - def test_cat_tosa_BI(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_tosa_BI_pipeline(self.Cat(), test_data) - - @parameterized.expand(Cat.test_parameters[:-3]) - @pytest.mark.corstone_fvp - def test_cat_u55_BI(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_ethosu_BI_pipeline( - self.Cat(), common.get_u55_compile_spec(), test_data - ) - - # MLETORCH-630 Cat does not work on FVP with batch>1 - @parameterized.expand(Cat.test_parameters[-3:]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_cat_u55_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_ethosu_BI_pipeline( - self.Cat(), common.get_u55_compile_spec(), test_data - ) - - @parameterized.expand(Cat.test_parameters[:-3]) - @pytest.mark.corstone_fvp - def test_cat_u85_BI(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_ethosu_BI_pipeline( - self.Cat(), common.get_u85_compile_spec(), test_data - ) - - # MLETORCH-630 Cat does not work on FVP with batch>1 - @parameterized.expand(Cat.test_parameters[-3:]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_cat_u85_BI_xfails(self, operands: tuple[torch.Tensor, ...], dim: int): - test_data = (operands, dim) - self._test_cat_ethosu_BI_pipeline( - self.Cat(), common.get_u85_compile_spec(), test_data + -3, + ), + } + + def __init__(self): + super().__init__() + + def forward(self, t: tuple[torch.Tensor, ...], dim: int) -> torch.Tensor: + return torch.cat(t, dim=dim) + + +@common.parametrize("test_data", Cat.test_parameters) +def test_cat_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Cat(), + test_data(), + aten_op, + exir_op, + ) + pipeline.run() + + +def test_cat_tosa_MI_4d(): + square = torch.ones((2, 2, 2, 2)) + for dim in range(-3, 3): + test_data = ((square, square.clone()), dim) + pipeline = TosaPipelineMI[input_t1]( + Cat(), + test_data, + aten_op, + exir_op, ) + pipeline.run() + + +@common.parametrize("test_data", Cat.test_parameters) +def test_cat_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Cat(), + test_data(), + aten_op, + exir_op, + ) + pipeline.run() + + +x_fails = { + "cat_rand_two_tensors_dim_0": "MLETORCH-630: AssertionError: Output 0 does not match reference output.", + "cat_rand_two_tensors_dim_0": "MLETORCH-630: AssertionError: Output 0 does not match reference output.", + "cat_rand_two_tensors_dim_3": "MLETORCH-630: AssertionError: Output 0 does not match reference output.", + "cat_rand_large": "MLETORCH-630: AssertionError: Output 0 does not match reference output.", +} + + +@common.parametrize("test_data", Cat.test_parameters, x_fails) +@common.XfailIfNoCorstone300 +def test_cat_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Cat(), + test_data(), + aten_op, + exir_op, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Cat.test_parameters, x_fails) +@common.XfailIfNoCorstone320 +def test_cat_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Cat(), + test_data(), + aten_op, + exir_op, + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_clamp.py b/backends/arm/test/ops/test_clamp.py index 368f7967433..0846effa7a6 100644 --- a/backends/arm/test/ops/test_clamp.py +++ b/backends/arm/test/ops/test_clamp.py @@ -17,20 +17,21 @@ TosaPipelineMI, ) - aten_op = "torch.ops.aten.clamp.default" exir_op = "executorch_exir_dialects_edge__ops_aten_clamp_default" + input_t = Tuple[torch.Tensor] + test_data_suite = { # test_name: (test_data, min, max) - "rank_1": (torch.rand(10) * 2, -1.0, 1.0), - "rank_2": (torch.rand(1, 35), 0.5, 0.8), - "rank_3": (torch.ones(1, 10, 10), -1, -1), - "rank_4": (torch.rand(1, 10, 10, 1) * 2, -0.1, 2.0), - "rank_4_mixed_min_max_dtype": (torch.rand(1, 10, 10, 5) + 10, 8.0, 10), - "rank_4_no_min": (torch.rand(1, 10, 10, 1) * 10, None, 5), - "rank_4_no_max": (torch.rand(1, 10, 10, 1) - 3, -3.3, None), + "rank_1": lambda: (torch.rand(10) * 2, -1.0, 1.0), + "rank_2": lambda: (torch.rand(1, 35), 0.5, 0.8), + "rank_3": lambda: (torch.ones(1, 10, 10), -1, -1), + "rank_4": lambda: (torch.rand(1, 10, 10, 1) * 2, -0.1, 2.0), + "rank_4_mixed_min_max_dtype": lambda: (torch.rand(1, 10, 10, 5) + 10, 8.0, 10), + "rank_4_no_min": lambda: (torch.rand(1, 10, 10, 1) * 10, None, 5), + "rank_4_no_max": lambda: (torch.rand(1, 10, 10, 1) - 3, -3.3, None), } @@ -52,7 +53,7 @@ def forward(self, x): @common.parametrize("test_data", test_data_suite) def test_clamp_tosa_MI(test_data): - input_tensor, min_val, max_val = test_data + input_tensor, min_val, max_val = test_data() model = Clamp(min_val, max_val) pipeline = TosaPipelineMI[input_t]( @@ -68,7 +69,7 @@ def test_clamp_tosa_MI(test_data): @common.parametrize("test_data", test_data_suite) def test_clamp_tosa_BI(test_data): - input_tensor, min_val, max_val = test_data + input_tensor, min_val, max_val = test_data() model = Clamp(min_val, max_val) pipeline = TosaPipelineBI[input_t]( @@ -84,46 +85,10 @@ def test_clamp_tosa_BI(test_data): @common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 def test_clamp_u55_BI(test_data): - input_tensor, min_val, max_val = test_data - model = Clamp(min_val, max_val) - - pipeline = EthosU55PipelineBI[input_t]( - model, - (input_tensor,), - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -def test_clamp_u85_BI(test_data): - - input_tensor, min_val, max_val = test_data - model = Clamp(min_val, max_val) - - pipeline = EthosU85PipelineBI[input_t]( - model, - (input_tensor,), - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -@common.SkipIfNoCorstone300 -def test_clamp_u55_BI_on_fvp(test_data): - - input_tensor, min_val, max_val = test_data + input_tensor, min_val, max_val = test_data() model = Clamp(min_val, max_val) pipeline = EthosU55PipelineBI[input_t]( @@ -140,10 +105,10 @@ def test_clamp_u55_BI_on_fvp(test_data): @common.parametrize("test_data", test_data_suite) -@common.SkipIfNoCorstone320 -def test_clamp_u85_BI_on_fvp(test_data): +@common.XfailIfNoCorstone320 +def test_clamp_u85_BI(test_data): - input_tensor, min_val, max_val = test_data + input_tensor, min_val, max_val = test_data() model = Clamp(min_val, max_val) pipeline = EthosU85PipelineBI[input_t]( diff --git a/backends/arm/test/ops/test_clone.py b/backends/arm/test/ops/test_clone.py index 2aad62ece24..125a705ccb4 100644 --- a/backends/arm/test/ops/test_clone.py +++ b/backends/arm/test/ops/test_clone.py @@ -21,7 +21,6 @@ TosaPipelineMI, ) - aten_op = "torch.ops.aten.clone.default" exir_op = "executorch_exir_dialects_edge__ops_aten_clone_default" @@ -36,13 +35,13 @@ def forward(self, x: torch.Tensor): test_data_suite = { - "ones_1D_10": (torch.ones(10),), - "ones_1D_50": (torch.ones(50),), - "rand_1D_20": (torch.rand(20),), - "rand_2D_10x10": (torch.rand(10, 10),), - "rand_3D_5x5x5": (torch.rand(5, 5, 5),), - "rand_4D_2x3x4x5": (torch.rand(2, 3, 4, 5),), - "large_tensor": (torch.rand(1000),), + "ones_1D_10": lambda: (torch.ones(10),), + "ones_1D_50": lambda: (torch.ones(50),), + "rand_1D_20": lambda: (torch.rand(20),), + "rand_2D_10x10": lambda: (torch.rand(10, 10),), + "rand_3D_5x5x5": lambda: (torch.rand(5, 5, 5),), + "rand_4D_2x3x4x5": lambda: (torch.rand(2, 3, 4, 5),), + "large_tensor": lambda: (torch.rand(1000),), } @@ -51,7 +50,7 @@ def test_clone_tosa_MI(test_data: Tuple[torch.Tensor]): pipeline = TosaPipelineMI[input_t]( Clone(), - test_data, + test_data(), aten_op, exir_op, ) @@ -63,7 +62,7 @@ def test_clone_tosa_MI(test_data: Tuple[torch.Tensor]): def test_clone_tosa_BI(test_data): pipeline = TosaPipelineBI[input_t]( Clone(), - test_data, + test_data(), aten_op, exir_op, symmetric_io_quantization=True, @@ -72,48 +71,14 @@ def test_clone_tosa_BI(test_data): @common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 @pytest.mark.xfail( reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477" ) def test_clone_u55_BI(test_data): pipeline = EthosU55PipelineBI[input_t]( Clone(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -@pytest.mark.xfail( - reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477" -) -def test_clone_u85_BI(test_data): - pipeline = EthosU85PipelineBI[input_t]( - Clone(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - symmetric_io_quantization=True, - ) - - pipeline.run() - - -@common.parametrize("test_data", test_data_suite) -@pytest.mark.xfail( - reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477" -) -@common.SkipIfNoCorstone300 -def test_clone_u55_BI_on_fvp(test_data): - pipeline = EthosU55PipelineBI[input_t]( - Clone(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, @@ -124,14 +89,14 @@ def test_clone_u55_BI_on_fvp(test_data): @common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 @pytest.mark.xfail( reason="Empty subgraph leads to Vela compilation failure. See: https://jira.arm.com/browse/MLBEDSW-10477" ) -@common.SkipIfNoCorstone320 -def test_clone_u85_BI_on_fvp(test_data): +def test_clone_u85_BI(test_data): pipeline = EthosU85PipelineBI[input_t]( Clone(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_constant_pad_nd.py b/backends/arm/test/ops/test_constant_pad_nd.py index 9a19f6fbf5f..0a81fd0f97d 100644 --- a/backends/arm/test/ops/test_constant_pad_nd.py +++ b/backends/arm/test/ops/test_constant_pad_nd.py @@ -17,19 +17,20 @@ aten_op = "torch.ops.aten.pad.default" exir_op = "executorch_exir_dialects_edge__ops_aten_pad_default" + input_t1 = Tuple[torch.Tensor] # Input x + test_data_suite = { - "4dim_last1dim": (torch.rand(1, 1, 16, 16), (1, 1, 0, 0, 0, 0, 0, 0), 1), - "4dim_last2dim": (torch.rand(1, 1, 16, 16), (1, 0, 1, 0, 0, 0, 0, 0), 2), - "4dim_last3dim": (torch.rand(1, 1, 16, 16), (1, 1, 0, 2, 0, 2, 0, 0), 3), - "4dim_last4dim": (torch.rand(1, 1, 16, 16), (1, 0, 1, 1, 0, 2, 0, 2), 4), - "3dim_last1dim": (torch.rand(1, 1, 16), (1, 1, 0, 0, 0, 0), 1), - "3dim_last2dim": (torch.rand(1, 1, 16), (1, 0, 1, 1, 0, 0), 2), - "3dim_last3dim": (torch.rand(1, 1, 16), (1, 0, 1, 0, 1, 1), 3), - "2dim_last1dim": (torch.rand(1, 1, 16), (1, 1, 0, 0), 1), - "2dim_last2dim": (torch.rand(1, 1, 16), (1, 0, 1, 1), 2), + "4dim_last1dim": lambda: (torch.rand(1, 1, 16, 16), (1, 1, 0, 0, 0, 0, 0, 0), 1), + "4dim_last2dim": lambda: (torch.rand(1, 1, 16, 16), (1, 0, 1, 0, 0, 0, 0, 0), 2), + "4dim_last3dim": lambda: (torch.rand(1, 1, 16, 16), (1, 1, 0, 2, 0, 2, 0, 0), 3), + "4dim_last4dim": lambda: (torch.rand(1, 1, 16, 16), (1, 0, 1, 1, 0, 2, 0, 2), 4), + "3dim_last1dim": lambda: (torch.rand(1, 1, 16), (1, 1, 0, 0, 0, 0), 1), + "3dim_last2dim": lambda: (torch.rand(1, 1, 16), (1, 0, 1, 1, 0, 0), 2), + "3dim_last3dim": lambda: (torch.rand(1, 1, 16), (1, 0, 1, 0, 1, 1), 3), + "2dim_last1dim": lambda: (torch.rand(1, 1, 16), (1, 1, 0, 0), 1), + "2dim_last2dim": lambda: (torch.rand(1, 1, 16), (1, 0, 1, 1), 2), } -"""Tests pad.""" class ConstantPadND(torch.nn.Module): @@ -53,7 +54,7 @@ def forward(self, x: torch.Tensor): test_data_suite, ) def test_constant_pad_nd_tosa_MI(test_data: Tuple): - test_data, padding, value = test_data + test_data, padding, value = test_data() pipeline = TosaPipelineMI[input_t1]( ConstantPadND(padding, value), (test_data,), @@ -65,7 +66,7 @@ def test_constant_pad_nd_tosa_MI(test_data: Tuple): @common.parametrize("test_data", test_data_suite) def test_constant_pad_nd_tosa_BI(test_data: Tuple): - test_data, padding, value = test_data + test_data, padding, value = test_data() pipeline = TosaPipelineBI[input_t1]( ConstantPadND(padding, value), (test_data,), diff --git a/backends/arm/test/ops/test_conv1d.py b/backends/arm/test/ops/test_conv1d.py index a1ba23ac73a..768da4d5c89 100644 --- a/backends/arm/test/ops/test_conv1d.py +++ b/backends/arm/test/ops/test_conv1d.py @@ -250,27 +250,27 @@ def forward(self, x): ) test_modules = { - "2_3x2x40_nobias": conv1d_2_3x2x40_nobias, - "3_1x3x256_st1": conv1d_3_1x3x256_st1, - "3_1x3x12_st2_pd1": conv1d_3_1x3x12_st2_pd1, - "1_1x2x128_st1": conv1d_1_1x2x128_st1, - "2_1x2x14_st2": conv1d_2_1x2x14_st2, - "5_3x2x128_st1": conv1d_5_3x2x128_st1, - "3_1x3x224_st2_pd1": conv1d_3_1x3x224_st2_pd1, - "7_1x3x16_st2_pd1_dl2_needs_adjust_pass": conv1d_7_1x3x16_st2_pd1_dl2, - "7_1x3x15_st1_pd0_dl1_needs_adjust_pass": conv1d_7_1x3x15_st1_pd0_dl1, - "5_1x3x14_st5_pd0_dl1_needs_adjust_pass": conv1d_5_1x3x14_st5_pd0_dl1, - "5_1x3x9_st5_pd0_dl1_needs_adjust_pass": conv1d_5_1x3x9_st5_pd0_dl1, - "two_conv1d_nobias": two_conv1d_nobias, - "two_conv1d": two_conv1d, + "2_3x2x40_nobias": lambda: conv1d_2_3x2x40_nobias, + "3_1x3x256_st1": lambda: conv1d_3_1x3x256_st1, + "3_1x3x12_st2_pd1": lambda: conv1d_3_1x3x12_st2_pd1, + "1_1x2x128_st1": lambda: conv1d_1_1x2x128_st1, + "2_1x2x14_st2": lambda: conv1d_2_1x2x14_st2, + "5_3x2x128_st1": lambda: conv1d_5_3x2x128_st1, + "3_1x3x224_st2_pd1": lambda: conv1d_3_1x3x224_st2_pd1, + "7_1x3x16_st2_pd1_dl2_needs_adjust_pass": lambda: conv1d_7_1x3x16_st2_pd1_dl2, + "7_1x3x15_st1_pd0_dl1_needs_adjust_pass": lambda: conv1d_7_1x3x15_st1_pd0_dl1, + "5_1x3x14_st5_pd0_dl1_needs_adjust_pass": lambda: conv1d_5_1x3x14_st5_pd0_dl1, + "5_1x3x9_st5_pd0_dl1_needs_adjust_pass": lambda: conv1d_5_1x3x9_st5_pd0_dl1, + "two_conv1d_nobias": lambda: two_conv1d_nobias, + "two_conv1d": lambda: two_conv1d, } @common.parametrize("test_module", test_modules) def test_convolution_1d_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), aten_op, exir_op, ) @@ -280,8 +280,8 @@ def test_convolution_1d_tosa_MI(test_module): @common.parametrize("test_module", test_modules) def test_convolution_1d_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), aten_op, exir_op, ) @@ -290,35 +290,11 @@ def test_convolution_1d_tosa_BI(test_module): @common.parametrize("test_module", test_modules) +@common.XfailIfNoCorstone300 def test_convolution_1d_u55_BI(test_module): pipeline = EthosU55PipelineBI[input_t]( - test_module, - test_module.get_inputs(), - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - -@common.parametrize("test_module", test_modules) -def test_convolution_1d_u85_BI(test_module): - pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - -@common.parametrize("test_module", test_modules) -@common.SkipIfNoCorstone300 -def test_convolution_1d_u55_BI_on_fvp(test_module): - pipeline = EthosU55PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), aten_op, exir_op, run_on_fvp=True, @@ -328,11 +304,11 @@ def test_convolution_1d_u55_BI_on_fvp(test_module): @common.parametrize("test_module", test_modules) -@common.SkipIfNoCorstone320 -def test_convolution_1d_u85_BI_on_fvp(test_module): +@common.XfailIfNoCorstone320 +def test_convolution_1d_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), aten_op, exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py index 844eed97638..158c296e4ec 100644 --- a/backends/arm/test/ops/test_conv2d.py +++ b/backends/arm/test/ops/test_conv2d.py @@ -330,24 +330,24 @@ def forward(self, x): # Shenanigan to get a nicer output when test fails. With unittest it looks like: # FAIL: test_convolution_2d_tosa_BI_2_3x3_1x3x12x12_st2_pd1 test_modules = { - "2x2_3x2x40x40_nobias": conv2d_2x2_3x2x40x40_nobias, - "3x3_1x3x256x256_st1": conv2d_3x3_1x3x256x256_st1, - "3x3_1x3x12x12_st2_pd1": conv2d_3x3_1x3x12x12_st2_pd1, - "1x1_1x2x128x128_st1": conv2d_1x1_1x2x128x128_st1, - "2x2_1x1x14x13_st2_needs_adjust_pass": conv2d_2x2_1x1x14x13_st2, - "5x5_1x3x14x15_st3_pd1_needs_adjust_pass": conv2d_5x5_1x3x14x15_st3_pd1, - "7x7_1x3x16x16_st2_pd1_dl2_needs_adjust_pass": conv2d_7x7_1x3x16x16_st2_pd1_dl2, - "7x7_1x3x15x15_st1_pd0_dl1_needs_adjust_pass": conv2d_7x7_1x3x15x15_st1_pd0_dl1, - "5x5_1x3x14x14_st5_pd0_dl1_needs_adjust_pass": conv2d_5x5_1x3x14x14_st5_pd0_dl1, - "5x5_1x3x9x9_st5_pd0_dl1_needs_adjust_pass": conv2d_5x5_1x3x9x9_st5_pd0_dl1, - "3x3_1x3x9x8_st3_pd0_dl1_needs_adjust_pass": conv2d_3x3_1x3x9x8_st3_pd0_dl1, - "3x3_1x3x8x9_st3_pd0_dl1_needs_adjust_pass": conv2d_3x3_1x3x8x9_st3_pd0_dl1, - "3x4_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv2d_3x4_1x3x7x7_st3_pd0_dl1, - "4x3_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv2d_4x3_1x3x7x7_st3_pd0_dl1, - "5x5_3x2x128x128_st1": conv2d_5x5_3x2x128x128_st1, - "3x3_1x3x224x224_st2_pd1": conv2d_3x3_1x3x224x224_st2_pd1, - "two_conv2d_nobias": two_conv2d_nobias, - "two_conv2d": two_conv2d, + "2x2_3x2x40x40_nobias": lambda: conv2d_2x2_3x2x40x40_nobias, + "3x3_1x3x256x256_st1": lambda: conv2d_3x3_1x3x256x256_st1, + "3x3_1x3x12x12_st2_pd1": lambda: conv2d_3x3_1x3x12x12_st2_pd1, + "1x1_1x2x128x128_st1": lambda: conv2d_1x1_1x2x128x128_st1, + "2x2_1x1x14x13_st2_needs_adjust_pass": lambda: conv2d_2x2_1x1x14x13_st2, + "5x5_1x3x14x15_st3_pd1_needs_adjust_pass": lambda: conv2d_5x5_1x3x14x15_st3_pd1, + "7x7_1x3x16x16_st2_pd1_dl2_needs_adjust_pass": lambda: conv2d_7x7_1x3x16x16_st2_pd1_dl2, + "7x7_1x3x15x15_st1_pd0_dl1_needs_adjust_pass": lambda: conv2d_7x7_1x3x15x15_st1_pd0_dl1, + "5x5_1x3x14x14_st5_pd0_dl1_needs_adjust_pass": lambda: conv2d_5x5_1x3x14x14_st5_pd0_dl1, + "5x5_1x3x9x9_st5_pd0_dl1_needs_adjust_pass": lambda: conv2d_5x5_1x3x9x9_st5_pd0_dl1, + "3x3_1x3x9x8_st3_pd0_dl1_needs_adjust_pass": lambda: conv2d_3x3_1x3x9x8_st3_pd0_dl1, + "3x3_1x3x8x9_st3_pd0_dl1_needs_adjust_pass": lambda: conv2d_3x3_1x3x8x9_st3_pd0_dl1, + "3x4_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": lambda: conv2d_3x4_1x3x7x7_st3_pd0_dl1, + "4x3_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": lambda: conv2d_4x3_1x3x7x7_st3_pd0_dl1, + "5x5_3x2x128x128_st1": lambda: conv2d_5x5_3x2x128x128_st1, + "3x3_1x3x224x224_st2_pd1": lambda: conv2d_3x3_1x3x224x224_st2_pd1, + "two_conv2d_nobias": lambda: two_conv2d_nobias, + "two_conv2d": lambda: two_conv2d, } fvp_xfails = { @@ -360,7 +360,10 @@ def forward(self, x): @common.parametrize("test_module", test_modules) def test_convolution_2d_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, ) pipeline.run() @@ -368,48 +371,43 @@ def test_convolution_2d_tosa_MI(test_module): @common.parametrize("test_module", test_modules) def test_convolution_2d_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, ) pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() -@common.parametrize("test_module", test_modules) -def test_convolution_2d_u55_BI(test_module): - pipeline = EthosU55PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=False - ) - pipeline.run() - - -@common.parametrize("test_module", test_modules) -def test_convolution_2d_u85_BI(test_module): - pipeline = EthosU85PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=False - ) - pipeline.run() - - @common.parametrize("test_module", test_modules, fvp_xfails) -@common.SkipIfNoCorstone300 -def test_convolution_2d_u55_BI_on_fvp(test_module): +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI(test_module): pipeline = EthosU55PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, ) pipeline.run() @common.parametrize("test_module", test_modules, fvp_xfails) -@common.SkipIfNoCorstone320 -def test_convolution_2d_u85_BI_on_fvp(test_module): +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, ) pipeline.run() reject_suite = { - "large_stride": Conv2d( + "large_stride": lambda: Conv2d( in_channels=1, out_channels=1, kernel_size=(2, 4), @@ -419,7 +417,7 @@ def test_convolution_2d_u85_BI_on_fvp(test_module): height=14, batches=1, ), - "large_kernel_height": Conv2d( + "large_kernel_height": lambda: Conv2d( in_channels=1, out_channels=1, kernel_size=(2, 65), @@ -429,7 +427,7 @@ def test_convolution_2d_u85_BI_on_fvp(test_module): height=70, batches=1, ), - "large_kernel": Conv2d( + "large_kernel": lambda: Conv2d( in_channels=1, out_channels=1, kernel_size=(70, 60), @@ -443,12 +441,11 @@ def test_convolution_2d_u85_BI_on_fvp(test_module): @common.parametrize("module", reject_suite) -def test_reject_convolution_2d_u55_BI( - module: Conv2d, -): +def test_convolution_2d_u55_BI_not_delegated(module: Conv2d): OpNotSupportedPipeline( - module, - module.get_inputs(), - "TOSA-0.80+BI+u55", + module(), + module().get_inputs(), {"executorch_exir_dialects_edge__ops_aten_convolution_default": 1}, + quantize=True, + u55_subset=True, ).run() diff --git a/backends/arm/test/ops/test_conv3d.py b/backends/arm/test/ops/test_conv3d.py index 22f7e9e7f54..c7bb7c55887 100644 --- a/backends/arm/test/ops/test_conv3d.py +++ b/backends/arm/test/ops/test_conv3d.py @@ -305,22 +305,22 @@ def forward(self, x): ) test_modules = { - "2x2_3x2x40x40_nobias": conv3d_2x2_3x2x40x40_nobias, - "3x3_1x3x256x256_st1": conv3d_3x3_1x3x256x256_st1, - "3x3_1x3x12x12_st2_pd1": conv3d_3x3_1x3x12x12_st2_pd1, - "1x1_1x2x128x128_st1": conv3d_1x1_1x2x128x128_st1, - "2x2_1x1x14x13_st2_needs_adjust_pass": conv3d_2x2_1x1x14x13_st2, - "5x5_1x3x14x15_st3_pd1_needs_adjust_pass": conv3d_5x5_1x3x14x15_st3_pd1, - "7x7_1x3x16x16_st2_pd1_dl2_needs_adjust_pass": conv3d_7x7_1x3x16x16_st2_pd1_dl2, - "7x7_1x3x15x15_st1_pd0_dl1_needs_adjust_pass": conv3d_7x7_1x3x15x15_st1_pd0_dl1, - "5x5_1x3x14x14_st5_pd0_dl1_needs_adjust_pass": conv3d_5x5_1x3x14x14_st5_pd0_dl1, - "5x5_1x3x9x9_st5_pd0_dl1_needs_adjust_pass": conv3d_5x5_1x3x9x9_st5_pd0_dl1, - "3x3_1x3x9x8_st3_pd0_dl1_needs_adjust_pass": conv3d_3x3_1x3x9x8_st3_pd0_dl1, - "3x3_1x3x8x9_st3_pd0_dl1_needs_adjust_pass": conv3d_3x3_1x3x8x9_st3_pd0_dl1, - "3x4_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv3d_3x4_1x3x7x7_st3_pd0_dl1, - "4x3_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": conv3d_4x3_1x3x7x7_st3_pd0_dl1, - "5x5_3x2x128x128_st1": conv3d_5x5_3x2x128x128_st1, - "3x3_1x3x224x224_st2_pd1": conv3d_3x3_1x3x224x224_st2_pd1, + "2x2_3x2x40x40_nobias": lambda: conv3d_2x2_3x2x40x40_nobias, + "3x3_1x3x256x256_st1": lambda: conv3d_3x3_1x3x256x256_st1, + "3x3_1x3x12x12_st2_pd1": lambda: conv3d_3x3_1x3x12x12_st2_pd1, + "1x1_1x2x128x128_st1": lambda: conv3d_1x1_1x2x128x128_st1, + "2x2_1x1x14x13_st2_needs_adjust_pass": lambda: conv3d_2x2_1x1x14x13_st2, + "5x5_1x3x14x15_st3_pd1_needs_adjust_pass": lambda: conv3d_5x5_1x3x14x15_st3_pd1, + "7x7_1x3x16x16_st2_pd1_dl2_needs_adjust_pass": lambda: conv3d_7x7_1x3x16x16_st2_pd1_dl2, + "7x7_1x3x15x15_st1_pd0_dl1_needs_adjust_pass": lambda: conv3d_7x7_1x3x15x15_st1_pd0_dl1, + "5x5_1x3x14x14_st5_pd0_dl1_needs_adjust_pass": lambda: conv3d_5x5_1x3x14x14_st5_pd0_dl1, + "5x5_1x3x9x9_st5_pd0_dl1_needs_adjust_pass": lambda: conv3d_5x5_1x3x9x9_st5_pd0_dl1, + "3x3_1x3x9x8_st3_pd0_dl1_needs_adjust_pass": lambda: conv3d_3x3_1x3x9x8_st3_pd0_dl1, + "3x3_1x3x8x9_st3_pd0_dl1_needs_adjust_pass": lambda: conv3d_3x3_1x3x8x9_st3_pd0_dl1, + "3x4_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": lambda: conv3d_3x4_1x3x7x7_st3_pd0_dl1, + "4x3_1x3x7x7_st3_pd0_dl1_needs_adjust_pass": lambda: conv3d_4x3_1x3x7x7_st3_pd0_dl1, + "5x5_3x2x128x128_st1": lambda: conv3d_5x5_3x2x128x128_st1, + "3x3_1x3x224x224_st2_pd1": lambda: conv3d_3x3_1x3x224x224_st2_pd1, } input_t = Tuple[torch.Tensor] @@ -328,18 +328,18 @@ def forward(self, x): @common.parametrize("test_module", test_modules) @pytest.mark.skip # Not implemented, skip until it is. -def test_convolution_3d_tosa_MI(test_module): +def test_convolution_tosa_MI_3d(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), test_module().get_inputs(), aten_op, exir_op ) pipeline.run() @common.parametrize("test_module", test_modules) @pytest.mark.skip # Not implemented, skip until it is. -def test_convolution_3d_tosa_BI(test_module): +def test_convolution_tosa_BI_3d(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), test_module().get_inputs(), aten_op, exir_op ) pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -347,24 +347,32 @@ def test_convolution_3d_tosa_BI(test_module): @common.parametrize("test_module", test_modules) @pytest.mark.skip # Not implemented, skip until it is. -def test_convolution_3d_u55_BI(test_module): +def test_convolution_u55_BI_3d(test_module): pipeline = EthosU55PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, ) pipeline.run() @common.parametrize("test_module", test_modules) @pytest.mark.skip # Not implemented, skip until it is. -def test_convolution_3d_u85_BI(test_module): +def test_convolution_u85_BI_3d(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, ) pipeline.run() reject_suite = { - "large_stride": Conv3d( + "large_stride": lambda: Conv3d( in_channels=1, out_channels=1, kernel_size=(2, 2, 1), @@ -374,7 +382,7 @@ def test_convolution_3d_u85_BI(test_module): height=14, batches=1, ), - "large_kernel_z": Conv3d( + "large_kernel_z": lambda: Conv3d( in_channels=1, out_channels=1, kernel_size=(2, 2, 2), @@ -388,12 +396,11 @@ def test_convolution_3d_u85_BI(test_module): @common.parametrize("module", reject_suite) -def test_reject_convolution_3d_u55_BI( - module: Conv3d, -): +def test_convolution_u55_BI_not_delegated_3d(module: Conv3d): OpNotSupportedPipeline( - module, - module.get_inputs(), - "TOSA-0.80+BI+u55", + module(), + module().get_inputs(), {"executorch_exir_dialects_edge__ops_aten_convolution_default": 1}, + quantize=True, + u55_subset=True, ).run() diff --git a/backends/arm/test/ops/test_conv_combos.py b/backends/arm/test/ops/test_conv_combos.py index 0fb3c2675e9..7f54fa226aa 100644 --- a/backends/arm/test/ops/test_conv_combos.py +++ b/backends/arm/test/ops/test_conv_combos.py @@ -1,20 +1,24 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest - from typing import Tuple import pytest import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor] + from torch.nn.parameter import Parameter @@ -138,13 +142,13 @@ class ComboConvRelu6(torch.nn.Module): "executorch_exir_dialects_edge__ops_aten_hardtanh_default", ] - test_data = [ - (2 * torch.randn(1, 3, 256, 256),), - (0.5 * torch.randn(1, 3, 256, 256),), - (torch.randn(1, 3, 256, 256),), - (-0.5 * torch.randn(1, 3, 256, 256),), - (-2 * torch.randn(1, 3, 256, 256),), - ] + test_data = { + "combo_conv_relu_2_x_4d": lambda: (2 * torch.randn(1, 3, 256, 256),), + "combo_conv_relu_0_5_x_4d": lambda: (0.5 * torch.randn(1, 3, 256, 256),), + "combo_conv_relu_4d": lambda: (torch.randn(1, 3, 256, 256),), + "combo_conv_relu_neg_0_5_x_4d": lambda: (-0.5 * torch.randn(1, 3, 256, 256),), + "combo_conv_relu_neg_2_x_4d": lambda: (-2 * torch.randn(1, 3, 256, 256),), + } def __init__(self): super().__init__() @@ -165,12 +169,12 @@ class ComboConvAvgPool2d(torch.nn.Module): "executorch_exir_dialects_edge__ops_aten_avg_pool2d_default", ] - test_data = [ - (20 * torch.randn(1, 3, 64, 32),), - (torch.randn(1, 3, 100, 200),), - (5 * torch.randn(1, 3, 256, 256),), - (torch.rand(1, 3, 512, 128),), - ] + test_data = { + "combo_conv_avgpool_20_x_4d": lambda: (20 * torch.randn(1, 3, 64, 32),), + "combo_conv_avgpool_4d": lambda: (torch.randn(1, 3, 100, 200),), + "combo_conv_avgpool_5_x_4d_randn": lambda: (5 * torch.randn(1, 3, 256, 256),), + "combo_conv_avgpool_2_x_4d": lambda: (torch.rand(1, 3, 512, 128),), + } def __init__(self): super().__init__() @@ -185,238 +189,291 @@ def forward(self, x): return x -class TestConvCombos(unittest.TestCase): - """Tests conv combined with other ops.""" - - def _test_conv_combo_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+MI", - ), - ) - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not(list(module.edge_op_list)) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_conv_combo_tosa_BI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple[torch.Tensor], - atol: float = 1e-3, - rtol: float = 1e-3, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), - ) - .quantize() - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not(list(module.edge_op_list)) - .to_executorch() - .run_method_and_compare_outputs( - inputs=test_data, atol=atol, rtol=rtol, qtol=1 - ) - ) - - def _test_conv_combo_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .check_not(list(module.edge_op_list)) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - #################### - ## Conv + meandim ## - #################### - def test_conv_meandim_tosa_MI(self): - model = ComboConv2dMeandim() - self._test_conv_combo_tosa_MI_pipeline(model, model.get_inputs()) - - def test_conv_meandim_tosa_BI(self): - model = ComboConv2dMeandim() - self._test_conv_combo_tosa_BI_pipeline(model, model.get_inputs()) - - @pytest.mark.corstone_fvp - def test_conv_meandim_u55_BI(self): - model = ComboConv2dMeandim() - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - model.get_inputs(), - ) - - @pytest.mark.corstone_fvp - def test_conv_meandim_u85_BI(self): - model = ComboConv2dMeandim() - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - model.get_inputs(), - ) - - ############################## - ## Conv + batch norm + relu ## - ############################## - affine_params = [("affine", True), ("_no_affine", False)] - - @parameterized.expand(affine_params) - def test_conv_batchnorm_relu6_tosa_MI(self, test_suffix, affine): - model = ComboConvBatchnormRelu6(affine) - self._test_conv_combo_tosa_MI_pipeline(model, model.get_inputs()) - - @parameterized.expand(affine_params) - def test_conv_batchnorm_relu6_tosa_BI(self, test_suffix, affine): - model = ComboConvBatchnormRelu6(affine) - self._test_conv_combo_tosa_BI_pipeline(model, model.get_inputs()) - - @parameterized.expand(affine_params) - @pytest.mark.corstone_fvp - def test_conv_batchnorm_relu6_u55_BI(self, test_suffix, affine): - model = ComboConvBatchnormRelu6(affine) - self._test_conv_combo_ethos_BI_pipeline( - model, common.get_u55_compile_spec(), model.get_inputs() - ) - - @parameterized.expand(affine_params) - @pytest.mark.corstone_fvp - def test_conv_batchnorm_relu_u85_BI(self, test_suffix, affine): - model = ComboConvBatchnormRelu6(affine) - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - model.get_inputs(), - ) - - ################## - ## Conv + ReLU6 ## - ################## - @parameterized.expand(ComboConvRelu6.test_data) - def test_conv_relu6_tosa_MI(self, test_data: torch.Tensor): - model = ComboConvRelu6() - test_data = (test_data,) - self._test_conv_combo_tosa_MI_pipeline(model, test_data) - - @parameterized.expand(ComboConvRelu6.test_data) - def test_conv_relu6_tosa_BI(self, test_data: torch.Tensor): - model = ComboConvRelu6() - test_data = (test_data,) - self._test_conv_combo_tosa_BI_pipeline(model, test_data) - - @parameterized.expand(ComboConvRelu6.test_data) - @pytest.mark.corstone_fvp - def test_conv_relu6_u55_BI(self, test_data: torch.Tensor): - model = ComboConvRelu6() - test_data = (test_data,) - self._test_conv_combo_ethos_BI_pipeline( - model, common.get_u55_compile_spec(), test_data - ) - - @parameterized.expand(ComboConvRelu6.test_data) - @pytest.mark.corstone_fvp - def test_conv_relu6_u85_BI(self, test_data: torch.Tensor): - model = ComboConvRelu6() - test_data = (test_data,) - self._test_conv_combo_ethos_BI_pipeline( - model, common.get_u85_compile_spec(), test_data - ) - - ############################### - ## Block bottleneck residual ## - ############################### - def test_block_bottleneck_residual_tosa_MI(self): - model = ComboBlockBottleneckResidual() - self._test_conv_combo_tosa_MI_pipeline(model, model.get_inputs()) - - @pytest.mark.flaky # TODO: Investigate flakyness (MLTORCH-307) - def test_block_bottleneck_residual_tosa_BI(self): - model = ComboBlockBottleneckResidual() - self._test_conv_combo_tosa_BI_pipeline(model, model.get_inputs()) - - @pytest.mark.corstone_fvp - def test_block_bottleneck_residual_u55_BI(self): - model = ComboBlockBottleneckResidual() - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - model.get_inputs(), - ) - - @pytest.mark.corstone_fvp - def test_block_bottleneck_residual_u85_BI(self): - model = ComboBlockBottleneckResidual() - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - model.get_inputs(), - ) - - ###################### - ## Conv + AvgPool2d ## - ###################### - @parameterized.expand(ComboConvAvgPool2d.test_data) - def test_conv_avgpool2d_tosa_MI(self, test_data: torch.Tensor): - model = ComboConvAvgPool2d() - test_data = (test_data,) - self._test_conv_combo_tosa_MI_pipeline(model, test_data) - - @parameterized.expand(ComboConvAvgPool2d.test_data) - def test_conv_avgpool2d_tosa_BI(self, test_data: torch.Tensor): - model = ComboConvAvgPool2d() - test_data = (test_data,) - self._test_conv_combo_tosa_BI_pipeline(model, test_data) - - @parameterized.expand(ComboConvAvgPool2d.test_data) - @pytest.mark.corstone_fvp - def test_conv_avgpool2d_u55_BI(self, test_data: torch.Tensor): - model = ComboConvAvgPool2d() - test_data = (test_data,) - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - test_data, - ) - - @parameterized.expand(ComboConvAvgPool2d.test_data) - @pytest.mark.corstone_fvp - def test_conv_avgpool2d_u85_BI(self, test_data: torch.Tensor): - model = ComboConvAvgPool2d() - test_data = (test_data,) - self._test_conv_combo_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - test_data, - ) +#################### +## Conv + meandim ## +#################### + + +def test_convolution_2d_tosa_MI_meandim(): + model = ComboConv2dMeandim() + + pipeline = TosaPipelineMI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboConv2dMeandim.edge_op_list, + ) + pipeline.run() + + +def test_convolution_2d_tosa_BI_meandim(): + model = ComboConv2dMeandim() + pipeline = TosaPipelineBI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboConv2dMeandim.edge_op_list, + ) + pipeline.run() + + +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI_meandim(): + model = ComboConv2dMeandim() + pipeline = EthosU55PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=ComboConv2dMeandim.edge_op_list, + run_on_fvp=True, + ) + pipeline.run() + + +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI_meandim(): + model = ComboConv2dMeandim() + pipeline = EthosU85PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=ComboConv2dMeandim.edge_op_list, + run_on_fvp=True, + ) + pipeline.run() + + +############################## +## Conv + batch norm + relu ## +############################## +affine_params = {"affine": True, "_no_affine": False} + + +@common.parametrize("affine", affine_params) +def test_convolution_2d_tosa_MI_batchnorm_relu6(affine): + model = ComboConvBatchnormRelu6(affine) + pipeline = TosaPipelineMI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboConvBatchnormRelu6.edge_op_list, + ) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307) +@common.parametrize("affine", affine_params) +def test_convolution_2d_tosa_BI_batchnorm_relu6(affine): + model = ComboConvBatchnormRelu6(affine) + pipeline = TosaPipelineBI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboConvBatchnormRelu6.edge_op_list, + ) + pipeline.run() + + +@common.parametrize("affine", affine_params) +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI_batchnorm_relu6(affine): + model = ComboConvBatchnormRelu6(affine) + pipeline = EthosU55PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("affine", affine_params) +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI_batchnorm_relu6(affine): + model = ComboConvBatchnormRelu6(affine) + pipeline = EthosU85PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +################## +## Conv + ReLU6 ## +################## + + +@common.parametrize("test_data", ComboConvRelu6.test_data) +def test_convolution_2d_tosa_MI_relu6(test_data: torch.Tensor): + model = ComboConvRelu6() + pipeline = TosaPipelineMI[input_t1]( + model, + test_data(), + aten_op=[], + exir_op=ComboConvRelu6.edge_op_list, + ) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307) +@common.parametrize("test_data", ComboConvRelu6.test_data) +def test_convolution_2d_tosa_BI_relu6(test_data: torch.Tensor): + model = ComboConvRelu6() + pipeline = TosaPipelineBI[input_t1]( + model, + test_data(), + aten_op=[], + exir_op=ComboConvRelu6.edge_op_list, + ) + pipeline.run() + + +@common.parametrize("test_data", ComboConvRelu6.test_data) +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI_relu6(test_data: torch.Tensor): + model = ComboConvRelu6() + pipeline = EthosU55PipelineBI[input_t1]( + model, + test_data(), + aten_ops=[], + exir_ops=ComboConvRelu6.edge_op_list, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", ComboConvRelu6.test_data) +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI_relu6(test_data: torch.Tensor): + model = ComboConvRelu6() + pipeline = EthosU85PipelineBI[input_t1]( + model, + test_data(), + aten_ops=[], + exir_ops=ComboConvRelu6.edge_op_list, + run_on_fvp=True, + ) + pipeline.run() + + +############################### +## Block bottleneck residual ## +############################### +def test_convolution_2d_tosa_MI_block_bottleneck(): + model = ComboBlockBottleneckResidual() + pipeline = TosaPipelineMI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboBlockBottleneckResidual.edge_op_list, + ) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307) +def test_convolution_2d_tosa_BI_block_bottleneck(): + model = ComboBlockBottleneckResidual() + pipeline = TosaPipelineBI[input_t1]( + model, + model.get_inputs(), + aten_op=[], + exir_op=ComboBlockBottleneckResidual.edge_op_list, + ) + pipeline.change_args("run_method_and_compare_outputs", model.get_inputs(), qtol=1) + pipeline.run() + + +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI_block_bottleneck(): + model = ComboBlockBottleneckResidual() + pipeline = EthosU55PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI_block_bottleneck(): + model = ComboBlockBottleneckResidual() + pipeline = EthosU85PipelineBI[input_t1]( + model, + model.get_inputs(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +###################### +## Conv + AvgPool2d ## +###################### + + +@common.parametrize("test_data", ComboConvAvgPool2d.test_data) +def test_convolution_2d_tosa_MI_avgpool2d(test_data: torch.Tensor): + model = ComboConvAvgPool2d() + pipeline = TosaPipelineMI[input_t1]( + model, + test_data(), + aten_op=[], + exir_op=ComboConvAvgPool2d.edge_op_list, + ) + pipeline.run() + + +x_fails = { + "combo_conv_avgpool_20_x_4d": "AssertionError: Output 0 does not match reference output.", + "combo_conv_avgpool_4d": "AssertionError: Output 0 does not match reference output.", + "combo_conv_avgpool_5_x_4d_randn": "AssertionError: Output 0 does not match reference output.", + "combo_conv_avgpool_2_x_4d": "AssertionError: Output 0 does not match reference output.", +} + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307) +@common.parametrize("test_data", ComboConvAvgPool2d.test_data, x_fails) +def test_convolution_2d_tosa_BI_avgpool2d(test_data: torch.Tensor): + model = ComboConvAvgPool2d() + pipeline = TosaPipelineBI[input_t1]( + model, + test_data(), + aten_op=[], + exir_op=ComboConvAvgPool2d.edge_op_list, + ) + pipeline.run() + + +@common.parametrize("test_data", ComboConvAvgPool2d.test_data) +@common.XfailIfNoCorstone300 +def test_convolution_2d_u55_BI_avgpool2d(test_data: torch.Tensor): + model = ComboConvAvgPool2d() + pipeline = EthosU55PipelineBI[input_t1]( + model, + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", ComboConvAvgPool2d.test_data) +@common.XfailIfNoCorstone320 +def test_convolution_2d_u85_BI_avgpool2d(test_data: torch.Tensor): + model = ComboConvAvgPool2d() + pipeline = EthosU85PipelineBI[input_t1]( + model, + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_depthwise_conv.py b/backends/arm/test/ops/test_depthwise_conv.py index 59ce628693c..91b3dde1bb2 100644 --- a/backends/arm/test/ops/test_depthwise_conv.py +++ b/backends/arm/test/ops/test_depthwise_conv.py @@ -1,24 +1,29 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest - from typing import Tuple import pytest import torch -from executorch.backends.arm.test import common, conftest +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t = Tuple[torch.Tensor] # Input x + +exir_op = "executorch_exir_dialects_edge__ops_aten_convolution_default" + from executorch.backends.arm.test.ops.test_conv1d import Conv1d from executorch.backends.arm.test.ops.test_conv2d import Conv2d -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized - """ The configuration when @@ -149,158 +154,93 @@ ) # Shenanigan to get a nicer output when test fails. -testsuite_conv2d = [ - ("2x2_1x6x4x4_gp6_st1", dw_conv2d_2x2_1x6x4x4_gp6_st1), - ("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1), - ("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias), - ("3x3_1x4x256x256_gp4_st1", dw_conv2d_3x3_1x4x256x256_gp4_st1), - ("3x3_2x8x198x198_gp8_st3", dw_conv2d_3x3_2x8x198x198_gp8_st3), - ("two_dw_conv2d", two_dw_conv2d), -] - -testsuite_conv2d_u85 = [ - ("2x2_1x6x4x4_gp6_st1", dw_conv2d_2x2_1x6x4x4_gp6_st1), - ("3x3_1x3x256x256_gp3_st1", dw_conv2d_3x3_1x3x256x256_gp3_st1), - ("3x3_1x4x256x256_gp4_st1", dw_conv2d_3x3_1x4x256x256_gp4_st1), - ("3x3_1x4x256x256_gp4_nobias", dw_conv2d_3x3_1x4x256x256_gp4_nobias), -] - -testsuite_conv2d_u85_xfails = [ - ("3x3_2x8x198x198_gp8_st3", dw_conv2d_3x3_2x8x198x198_gp8_st3), - ("two_dw_conv2d", two_dw_conv2d), -] - - -testsuite_conv1d = [ - ("2_1x6x4_gp6_st1", dw_conv1d_2_1x6x4_gp6_st1), - ("two_dw_conv1d", two_dw_conv1d), - ("3_1x3x256_gp3_st1", dw_conv1d_3_1x3x256_gp3_st1), - ("3_1x3x14_gp3_st1", dw_conv1d_3_1x3x14_gp3_st1), -] - - -class TestDepthwiseConv(unittest.TestCase): - """Tests Conv1D and Conv2D where groups == in_channels and out_channels = K * in_channels. This - is a special case enables depthwise convolution.""" - - def _test_dw_conv_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+MI", - ), - ) - .export() - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_dw_conv_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), - ) - .quantize() - .export() - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_dw_conv_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(testsuite_conv1d + testsuite_conv2d) - def test_dw_conv_tosa_MI(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_tosa_MI_pipeline(model, model.get_inputs()) - - @parameterized.expand(testsuite_conv1d + testsuite_conv2d) - @pytest.mark.flaky # TODO: Investigate flakyness (MLTORCH-307) - def test_dw_conv_tosa_BI(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_tosa_BI_pipeline(model, model.get_inputs()) - - @parameterized.expand(testsuite_conv2d[:4], skip_on_empty=True) - @pytest.mark.corstone_fvp - def test_dw_conv2d_u55_BI(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - model.get_inputs(), - ) - - @parameterized.expand(testsuite_conv2d[4:], skip_on_empty=True) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP # TODO: MLETORCH-516 - def test_dw_conv2d_u55_BI_xfails(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - model.get_inputs(), - ) - - @parameterized.expand(testsuite_conv1d, skip_on_empty=True) - @pytest.mark.corstone_fvp - def test_dw_conv1d_u55_BI(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_ethos_BI_pipeline( - model, - common.get_u55_compile_spec(), - model.get_inputs(), - ) - - @parameterized.expand(testsuite_conv1d + testsuite_conv2d_u85) - @pytest.mark.corstone_fvp - def test_dw_conv_u85_BI(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - model.get_inputs(), - ) - - # All test cases except 3x3_1x3x256x256_gp3_st1 have numerical issues on FVP. MLETORCH-520 - @parameterized.expand(testsuite_conv2d_u85_xfails) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_dw_conv_u85_BI_xfails(self, test_name: str, model: torch.nn.Module): - self._test_dw_conv_ethos_BI_pipeline( - model, - common.get_u85_compile_spec(), - model.get_inputs(), - ) +testsuite_conv2d = { + "2x2_1x6x4x4_gp6_st1": lambda: dw_conv2d_2x2_1x6x4x4_gp6_st1, + "3x3_1x3x256x256_gp3_st1": lambda: dw_conv2d_3x3_1x3x256x256_gp3_st1, + "3x3_1x4x256x256_gp4_nobias": lambda: dw_conv2d_3x3_1x4x256x256_gp4_nobias, + "3x3_1x4x256x256_gp4_st1": lambda: dw_conv2d_3x3_1x4x256x256_gp4_st1, + "3x3_2x8x198x198_gp8_st3": lambda: dw_conv2d_3x3_2x8x198x198_gp8_st3, + "two_dw_conv2d": lambda: two_dw_conv2d, +} + +testsuite_conv2d_u85 = { + "2x2_1x6x4x4_gp6_st1": lambda: dw_conv2d_2x2_1x6x4x4_gp6_st1, + "3x3_1x3x256x256_gp3_st1": lambda: dw_conv2d_3x3_1x3x256x256_gp3_st1, + "3x3_1x4x256x256_gp4_st1": lambda: dw_conv2d_3x3_1x4x256x256_gp4_st1, + "3x3_1x4x256x256_gp4_nobias": lambda: dw_conv2d_3x3_1x4x256x256_gp4_nobias, +} + +testsuite_conv1d = { + "2_1x6x4_gp6_st1": lambda: dw_conv1d_2_1x6x4_gp6_st1, + "two_dw_conv1d": lambda: two_dw_conv1d, + "3_1x3x256_gp3_st1": lambda: dw_conv1d_3_1x3x256_gp3_st1, + "3_1x3x14_gp3_st1": lambda: dw_conv1d_3_1x3x14_gp3_st1, +} + + +@common.parametrize("test_module", testsuite_conv1d | testsuite_conv2d) +def test_convolution_2d_tosa_MI_depth_wise(test_module: torch.nn.Module): + pipeline = TosaPipelineMI[input_t]( + test_module(), + test_module().get_inputs(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness (MLTORCH-307) +@common.parametrize("test_module", testsuite_conv1d | testsuite_conv2d) +def test_convolution_2d_tosa_BI_depth_wise(test_module: torch.nn.Module): + pipeline = TosaPipelineBI[input_t]( + test_module(), + test_module().get_inputs(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +x_fails = { + "3x3_2x8x198x198_gp8_st3": "MLETORCH-516: AssertionError: Output 0 does not match reference output.", + "two_dw_conv2d": "MLETORCH-516: AssertionError: Output 0 does not match reference output.", +} + + +@common.parametrize("test_module", testsuite_conv2d, x_fails) +@common.XfailIfNoCorstone300 # TODO: MLETORCH-516 +def test_convolution_2d_u55_BI_depth_wise(test_module: torch.nn.Module): + pipeline = EthosU55PipelineBI[input_t]( + test_module(), + test_module().get_inputs(), + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=True, + ) + pipeline.run() + + +@common.XfailIfNoCorstone300 # TODO: MLETORCH-516 +@common.parametrize("test_module", testsuite_conv1d) +def test_convolution_1d_u55_BI_depth_wise(test_module: torch.nn.Module): + pipeline = EthosU55PipelineBI[input_t]( + test_module(), + test_module().get_inputs(), + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_module", testsuite_conv1d | testsuite_conv2d, x_fails) +@common.XfailIfNoCorstone320 # TODO: MLETORCH-516 +def test_convolution_2d_u85_BI_depth_wise(test_module: torch.nn.Module): + pipeline = EthosU85PipelineBI[input_t]( + test_module(), + test_module().get_inputs(), + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_div.py b/backends/arm/test/ops/test_div.py index d200a753ce5..087bdb84a63 100644 --- a/backends/arm/test/ops/test_div.py +++ b/backends/arm/test/ops/test_div.py @@ -1,243 +1,131 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Optional, Tuple, Union -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from parameterized import parameterized +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.div.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_div_Tensor" +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +test_data_suite = { # (test_name, input, other, rounding_mode) See torch.div() for info - ( - "op_div_rank1_ones", - torch.ones(5), - torch.ones(5), - None, - ), - ( - "op_div_rank1_negative_ones", + "op_div_rank1_ones": lambda: (torch.ones(5), torch.ones(5), None), + "op_div_rank1_negative_ones": lambda: ( torch.ones(5) * (-1), torch.ones(5) * (-1), None, ), - ( - "op_div_rank1_rand", + "op_div_rank1_rand": lambda: ( torch.rand(5) * 5, torch.rand(5) * 5, None, ), - ( - "op_div_rank4_ones", + "op_div_rank4_ones": lambda: ( torch.ones(5, 10, 25, 20), torch.ones(5, 10, 25, 20), None, ), - ( - "op_div_rank4_negative_ones", + "op_div_rank4_negative_ones": lambda: ( (-1) * torch.ones(5, 10, 25, 20), torch.ones(5, 10, 25, 20), None, ), - ( - "op_div_rank4_ones_div_negative", + "op_div_rank4_ones_div_negative": lambda: ( torch.ones(5, 10, 25, 20), (-1) * torch.ones(5, 10, 25, 20), None, ), - ( - "op_div_rank4_large_rand", + "op_div_rank4_large_rand": lambda: ( 200 * torch.rand(5, 10, 25, 20), torch.rand(5, 10, 25, 20), None, ), - ( - "op_div_rank4_negative_large_rand", + "op_div_rank4_negative_large_rand": lambda: ( (-200) * torch.rand(5, 10, 25, 20), torch.rand(5, 10, 25, 20), None, ), - ( - "op_div_rank4_large_randn", + "op_div_rank4_large_randn": lambda: ( 200 * torch.randn(5, 10, 25, 20) + 1, torch.rand(5, 10, 25, 20) + 1, None, ), -] - - -class TestDiv(unittest.TestCase): - """Tests division""" - - class Div(torch.nn.Module): - - def forward( - self, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - if rounding_mode is None: - return torch.div(input=input_, other=other_) - else: - return torch.div( - input=input_, other=other_, rounding_mode=rounding_mode - ) - - def _test_div_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.div.Tensor": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_div_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count( - {"torch.ops.aten.reciprocal.default": 1, "torch.ops.aten.mul.Tensor": 1} - ) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, atol=1, rtol=0.1) - ) - - def _test_div_ethos_BI_pipeline( - self, module: torch.nn.Module, compile_spec, test_data: Tuple[torch.Tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count( - {"torch.ops.aten.reciprocal.default": 1, "torch.ops.aten.mul.Tensor": 1} - ) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_div_tosa_MI( - self, - test_name: str, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - test_data = (input_, other_) - self._test_div_tosa_MI_pipeline(self.Div(), test_data) +} - @parameterized.expand(test_data_suite) - def test_div_tosa_BI( - self, - test_name: str, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - test_data = (input_, other_) - self._test_div_tosa_BI_pipeline(self.Div(), test_data) +class Div(torch.nn.Module): - @parameterized.expand(test_data_suite[:3]) - @pytest.mark.corstone_fvp - def test_div_u55_BI( - self, - test_name: str, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - test_data = (input_, other_) - self._test_div_ethos_BI_pipeline( - self.Div(), common.get_u55_compile_spec(), test_data - ) - - # Numerical issues on FVP likely due to mul op, MLETORCH-521 - @parameterized.expand(test_data_suite[3:]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_div_u55_BI_xfails( - self, - test_name: str, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - test_data = (input_, other_) - self._test_div_ethos_BI_pipeline( - self.Div(), common.get_u55_compile_spec(), test_data - ) - - @parameterized.expand(test_data_suite[:3]) - @pytest.mark.corstone_fvp - def test_div_u85_BI( - self, - test_name: str, - input_: Union[torch.Tensor, torch.types.Number], - other_: Union[torch.Tensor, torch.types.Number], - rounding_mode: Optional[str] = None, - ): - test_data = (input_, other_) - self._test_div_ethos_BI_pipeline( - self.Div(), common.get_u85_compile_spec(), test_data - ) - - # Numerical issues on FVP likely due to mul op, MLETORCH-521 - @parameterized.expand(test_data_suite[3:]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_div_u85_BI_xfails( + def forward( self, - test_name: str, input_: Union[torch.Tensor, torch.types.Number], other_: Union[torch.Tensor, torch.types.Number], rounding_mode: Optional[str] = None, ): - test_data = (input_, other_) - self._test_div_ethos_BI_pipeline( - self.Div(), common.get_u85_compile_spec(), test_data - ) + if rounding_mode is None: + return torch.div(input=input_, other=other_) + else: + return torch.div(input=input_, other=other_, rounding_mode=rounding_mode) + + +@common.parametrize("test_data", test_data_suite) +def test_div_tensor_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1](Div(), test_data(), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_div_tensor_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1](Div(), test_data(), aten_op=[], exir_op=[]) + pipeline.run() + + +x_fails = { + "op_div_rank4_ones": "MLETORCH-521: Numerical issues on FVP likely due to mul op", + "op_div_rank4_negative_ones": "MLETORCH-521: Numerical issues on FVP likely due to mul op", + "op_div_rank4_ones_div_negative": "MLETORCH-521: Numerical issues on FVP likely due to mul op", + "op_div_rank4_large_rand": "MLETORCH-521: Numerical issues on FVP likely due to mul op", + "op_div_rank4_negative_large_rand": "MLETORCH-521: Numerical issues on FVP likely due to mul op", + "op_div_rank4_large_randn": "MLETORCH-521: Numerical issues on FVP likely due to mul op", +} + + +@common.parametrize("test_data", test_data_suite, xfails=x_fails) +@common.XfailIfNoCorstone300 +def test_div_tensor_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Div(), + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite, xfails=x_fails) +@common.XfailIfNoCorstone320 +def test_div_tensor_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Div(), + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_eq.py b/backends/arm/test/ops/test_eq.py index e3bcf877ffe..bd6cace00a5 100644 --- a/backends/arm/test/ops/test_eq.py +++ b/backends/arm/test/ops/test_eq.py @@ -15,7 +15,6 @@ TosaPipelineMI, ) - input_t = Tuple[torch.Tensor] @@ -63,24 +62,27 @@ def get_inputs(self): op_eq_scalar_rank4_randn = Equal(torch.randn(3, 2, 2, 2), 0.3) test_data_tensor = { - "eq_tensor_rank1_ones": op_eq_tensor_rank1_ones, - "eq_tensor_rank2_rand": op_eq_tensor_rank2_rand, - "eq_tensor_rank3_randn": op_eq_tensor_rank3_randn, - "eq_tensor_rank4_randn": op_eq_tensor_rank4_randn, + "eq_tensor_rank1_ones": lambda: op_eq_tensor_rank1_ones, + "eq_tensor_rank2_rand": lambda: op_eq_tensor_rank2_rand, + "eq_tensor_rank3_randn": lambda: op_eq_tensor_rank3_randn, + "eq_tensor_rank4_randn": lambda: op_eq_tensor_rank4_randn, } test_data_scalar = { - "eq_scalar_rank1_ones": op_eq_scalar_rank1_ones, - "eq_scalar_rank2_rand": op_eq_scalar_rank2_rand, - "eq_scalar_rank3_randn": op_eq_scalar_rank3_randn, - "eq_scalar_rank4_randn": op_eq_scalar_rank4_randn, + "eq_scalar_rank1_ones": lambda: op_eq_scalar_rank1_ones, + "eq_scalar_rank2_rand": lambda: op_eq_scalar_rank2_rand, + "eq_scalar_rank3_randn": lambda: op_eq_scalar_rank3_randn, + "eq_scalar_rank4_randn": lambda: op_eq_scalar_rank4_randn, } @common.parametrize("test_module", test_data_tensor) -def test_eq_tensor_tosa_MI(test_module): +def test_eq_scalar_tosa_MI_tensor(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), Equal.aten_op_Tensor, Equal.exir_op + test_module(), + test_module().get_inputs(), + Equal.aten_op_Tensor, + Equal.exir_op, ) pipeline.run() @@ -88,8 +90,8 @@ def test_eq_tensor_tosa_MI(test_module): @common.parametrize("test_module", test_data_scalar) def test_eq_scalar_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), Equal.aten_op_Scalar, Equal.exir_op, ) @@ -97,9 +99,12 @@ def test_eq_scalar_tosa_MI(test_module): @common.parametrize("test_module", test_data_tensor) -def test_eq_tensor_tosa_BI(test_module): +def test_eq_scalar_tosa_BI_tensor(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), Equal.aten_op_Tensor, Equal.exir_op + test_module(), + test_module().get_inputs(), + Equal.aten_op_Tensor, + Equal.exir_op, ) pipeline.run() @@ -107,20 +112,24 @@ def test_eq_tensor_tosa_BI(test_module): @common.parametrize("test_module", test_data_scalar) def test_eq_scalar_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), Equal.aten_op_Tensor, Equal.exir_op + test_module(), + test_module().get_inputs(), + Equal.aten_op_Tensor, + Equal.exir_op, ) pipeline.run() @common.parametrize("test_module", test_data_tensor) @common.XfailIfNoCorstone300 -def test_eq_tensor_u55_BI(test_module): +def test_eq_scalar_u55_BI_tensor(test_module): # EQUAL is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {Equal.exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -130,11 +139,12 @@ def test_eq_tensor_u55_BI(test_module): def test_eq_scalar_u55_BI(test_module): # EQUAL is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {Equal.exir_op: 1}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -148,10 +158,10 @@ def test_eq_scalar_u55_BI(test_module): strict=False, ) @common.XfailIfNoCorstone320 -def test_eq_tensor_u85_BI(test_module): +def test_eq_scalar_u85_BI_tensor(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), Equal.aten_op_Tensor, Equal.exir_op, run_on_fvp=True, @@ -170,8 +180,8 @@ def test_eq_tensor_u85_BI(test_module): @common.XfailIfNoCorstone320 def test_eq_scalar_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), Equal.aten_op_Tensor, Equal.exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_erf.py b/backends/arm/test/ops/test_erf.py index d452be7cae1..e7136036c65 100644 --- a/backends/arm/test/ops/test_erf.py +++ b/backends/arm/test/ops/test_erf.py @@ -24,24 +24,24 @@ def forward(self, x: torch.Tensor): return torch.erf(x) test_data: dict[str, input_t1] = { - "zeros": (torch.zeros(1, 10, 10, 10),), - "ones": (torch.ones(10, 10, 10),), - "rand": ((torch.rand(10, 10) - 0.5),), - "randn_pos": ((torch.randn(1, 4, 4, 4) + 10),), - "randn_neg": ((torch.randn(1, 4, 4, 4) - 10),), - "ramp": (torch.arange(-16, 16, 0.2),), + "zeros": lambda: (torch.zeros(1, 10, 10, 10),), + "ones": lambda: (torch.ones(10, 10, 10),), + "rand": lambda: ((torch.rand(10, 10) - 0.5),), + "randn_pos": lambda: ((torch.randn(1, 4, 4, 4) + 10),), + "randn_neg": lambda: ((torch.randn(1, 4, 4, 4) - 10),), + "ramp": lambda: (torch.arange(-16, 16, 0.2),), } @common.parametrize("test_data", Erf.test_data) def test_erf_tosa_MI(test_data: input_t1): - pipeline = TosaPipelineMI[input_t1](Erf(), test_data, aten_op, exir_op) + pipeline = TosaPipelineMI[input_t1](Erf(), test_data(), aten_op, exir_op) pipeline.run() @common.parametrize("test_data", Erf.test_data) def test_erf_tosa_BI(test_data: input_t1): - pipeline = TosaPipelineBI[input_t1](Erf(), test_data, aten_op, exir_op) + pipeline = TosaPipelineBI[input_t1](Erf(), test_data(), aten_op, exir_op) pipeline.run() @@ -49,7 +49,7 @@ def test_erf_tosa_BI(test_data: input_t1): @common.XfailIfNoCorstone300 def test_erf_u55_BI(test_data: input_t1): pipeline = EthosU55PipelineBI[input_t1]( - Erf(), test_data, aten_op, exir_op, run_on_fvp=True + Erf(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() @@ -58,6 +58,6 @@ def test_erf_u55_BI(test_data: input_t1): @common.XfailIfNoCorstone320 def test_erf_u85_BI(test_data: input_t1): pipeline = EthosU85PipelineBI[input_t1]( - Erf(), test_data, aten_op, exir_op, run_on_fvp=True + Erf(), test_data(), aten_op, exir_op, run_on_fvp=True ) pipeline.run() diff --git a/backends/arm/test/ops/test_exp.py b/backends/arm/test/ops/test_exp.py index 3fa9f8c99fa..9218455916a 100644 --- a/backends/arm/test/ops/test_exp.py +++ b/backends/arm/test/ops/test_exp.py @@ -1,127 +1,85 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized -test_data_suite = [ +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(1, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(1, 4, 4, 4) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestExp(unittest.TestCase): - """Tests lowering of aten.exp""" - - class Exp(torch.nn.Module): - def forward(self, x: torch.Tensor) -> torch.Tensor: - return torch.exp(x) - - def _test_exp_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.exp.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_exp_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.exp.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_exp_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.exp.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_exp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_exp_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_exp_tosa_MI_pipeline(self.Exp(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_exp_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_exp_tosa_BI_pipeline(self.Exp(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_exp_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_exp_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Exp(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_exp_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_exp_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Exp(), (test_data,) - ) + "zeros": lambda: torch.zeros(1, 10, 10, 10), + "ones": lambda: torch.ones(10, 10, 10), + "rand": lambda: torch.rand(10, 10) - 0.5, + "randn_pos": lambda: torch.randn(1, 4, 4, 4) + 10, + "randn_neg": lambda: torch.randn(10) - 10, + "ramp": lambda: torch.arange(-16, 16, 0.2), +} + +aten_op = "torch.ops.aten.exp.default" +input_t1 = Tuple[torch.Tensor] # Input x + + +class Exp(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.exp(x) + + +@common.parametrize("test_data", test_data_suite) +def test_exp_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Exp(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_exp_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Exp(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_exp_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Exp(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_exp_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Exp(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_expand.py b/backends/arm/test/ops/test_expand.py index cd073bddcc8..8f84c39dd27 100644 --- a/backends/arm/test/ops/test_expand.py +++ b/backends/arm/test/ops/test_expand.py @@ -7,7 +7,6 @@ # Tests the expand op which copies the data of the input tensor (possibly with new data format) # -import unittest from typing import Sequence, Tuple @@ -15,153 +14,121 @@ import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, ) -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.arm.tosa_specification import TosaSpecification - -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized - - -class TestSimpleExpand(unittest.TestCase): - """Tests the Tensor.expand which should be converted to a repeat op by a pass.""" - - class Expand(torch.nn.Module): - # (input tensor, multiples) - test_parameters = [ - (torch.rand(1), (2,)), - (torch.randn(1), (2, 2, 4)), - (torch.randn(1, 1, 1, 5), (1, 4, -1, -1)), - (torch.randn(1, 1), (1, 2, 2, 4)), - (torch.randn(1, 1), (2, 2, 2, 4)), - (torch.randn(10, 1, 1, 97), (-1, 4, -1, -1)), - (torch.rand(1, 1, 2, 2), (4, 3, -1, 2)), - (torch.randn(1, 4), (1, -1)), - (torch.randn(1, 1, 192), (1, -1, -1)), - ] - - def forward(self, x: torch.Tensor, m: Sequence): - return x.expand(m) - - def _test_expand_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.expand.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.expand.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_expand_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.expand.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.expand.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_expand_ethosu_BI_pipeline( - self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: Tuple - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.expand.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.expand.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(Expand.test_parameters) - def test_expand_tosa_MI(self, test_input, multiples): - self._test_expand_tosa_MI_pipeline(self.Expand(), (test_input, multiples)) - - @parameterized.expand(Expand.test_parameters) - def test_expand_tosa_BI(self, test_input, multiples): - self._test_expand_tosa_BI_pipeline(self.Expand(), (test_input, multiples)) - - @parameterized.expand(Expand.test_parameters[:-5]) - @pytest.mark.corstone_fvp - def test_expand_u55_BI(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Expand(), (test_input, multiples) - ) - - # MLETORCH-629: Expand does not work on FVP with batch>1 - @parameterized.expand(Expand.test_parameters[-5:-2]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_expand_u55_BI_xfails_on_fvp(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Expand(), (test_input, multiples) - ) - - @parameterized.expand(Expand.test_parameters[-2:]) - @pytest.mark.xfail( - reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs" + +aten_op = "torch.ops.aten.expand.default" +input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x, Input y + + +class Expand(torch.nn.Module): + # (input tensor, multiples) + test_parameters = { + "rand_1d_both": lambda: (torch.rand(1), (2,)), + "rand_1d": lambda: (torch.randn(1), (2, 2, 4)), + "rand_4d": lambda: (torch.randn(1, 1, 1, 5), (1, 4, -1, -1)), + "rand_batch_1": lambda: (torch.randn(1, 1), (1, 2, 2, 4)), + "rand_batch_2": lambda: (torch.randn(1, 1), (2, 2, 2, 4)), + "rand_mix_neg": lambda: (torch.randn(10, 1, 1, 97), (-1, 4, -1, -1)), + "rand_small_neg": lambda: (torch.rand(1, 1, 2, 2), (4, 3, -1, 2)), + } + + test_reject_set = { + "rand_2d": lambda: (torch.randn(1, 4), (1, -1)), + "rand_neg_mul": lambda: (torch.randn(1, 1, 192), (1, -1, -1)), + } + + def forward(self, x: torch.Tensor, m: Sequence): + return x.expand(m) + + +@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set) +def test_expand_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Expand.test_parameters | Expand.test_reject_set) +def test_expand_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +x_fails = { + "rand_batch_2": "AssertionError: Output 0 does not match reference output.", + "rand_mix_neg": "AssertionError: Output 0 does not match reference output.", + "rand_small_neg": "AssertionError: Output 0 does not match reference output.", +} + + +@common.parametrize("test_data", Expand.test_parameters, x_fails) +@common.XfailIfNoCorstone300 +def test_expand_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, ) - def test_expand_u55_BI_xfails(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Expand(), (test_input, multiples) - ) - - @parameterized.expand(Expand.test_parameters[:-5]) - @pytest.mark.corstone_fvp - def test_expand_u85_BI(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Expand(), (test_input, multiples) - ) - - # MLETORCH-629: Expand does not work on FVP with batch>1 - @parameterized.expand(Expand.test_parameters[-5:-2]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_expand_u85_BI_xfails_on_fvp(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Expand(), (test_input, multiples) - ) - - @parameterized.expand(Expand.test_parameters[-2:]) - @pytest.mark.xfail( - reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs" + pipeline.run() + + +@common.parametrize("test_data", Expand.test_parameters, x_fails) +@common.XfailIfNoCorstone320 +def test_expand_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Expand.test_reject_set) +@common.XfailIfNoCorstone300 +@pytest.mark.xfail( + reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs" +) +def test_expand_u55_BI_failure_set(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Expand.test_reject_set) +@common.XfailIfNoCorstone320 +@pytest.mark.xfail( + reason="MLETORCH-716: Node will be optimized away and Vela can't handle empty graphs" +) +def test_expand_u85_BI_failure_set(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Expand(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, ) - def test_expand_u85_xfails(self, test_input, multiples): - self._test_expand_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Expand(), (test_input, multiples) - ) + pipeline.run() diff --git a/backends/arm/test/ops/test_full.py b/backends/arm/test/ops/test_full.py index 193ed632ed0..13a3146f2fe 100644 --- a/backends/arm/test/ops/test_full.py +++ b/backends/arm/test/ops/test_full.py @@ -8,186 +8,199 @@ # The shape and value are set at compile time, i.e. can't be set by a tensor input. # -import unittest - from typing import Tuple import pytest import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestFull(unittest.TestCase): - """Tests the full op which creates a tensor of a given shape filled with a given value.""" - - class Full(torch.nn.Module): - # A single full op - def forward(self): - return torch.full((3, 3), 4.5) - - class AddConstFull(torch.nn.Module): - # Input + a full with constant value. - def forward(self, x: torch.Tensor): - return torch.full((2, 2, 3, 3), 4.5, dtype=torch.float32) + x - - class AddVariableFull(torch.nn.Module): - sizes: list[tuple[int, ...]] = [ - (5,), - (5, 5), - (5, 5, 5), - (1, 5, 5, 5), - ] - test_parameters = [((torch.randn(n) * 10 - 5, 3.2),) for n in sizes] - - def forward(self, x: torch.Tensor, y): - # Input + a full with the shape from the input and a given value 'y'. - return x + torch.full(x.shape, y) - - class FullLike(torch.nn.Module): - """Since full_like is replaced with full, we only need to test on reference model, not FVP.""" - - test_parameters = [ - ((torch.randn(2, 2, 2, 2) * 50, 3.2),), - ((torch.randn(2, 2, 2, 2) * 50, 3),), - (((torch.randn(2, 2, 2, 2) * 50).to(torch.int32), 3.2),), - (((torch.randn(2, 2, 2, 2) * 50).to(torch.int32), 3),), - ] - - def forward(self, input_tensor: torch.Tensor, value): - # Our backend can't handle tensors without users, which input_tensor doesn't have - # when the full_like is converted to a full. Therefore involve it in the output. - return input_tensor + torch.full_like(input_tensor, value) - - def _test_full_tosa_MI_pipeline( - self, - module: torch.nn.Module, - example_data: Tuple, - test_data: Tuple | None = None, - ): - if test_data is None: - test_data = example_data - ( - ArmTester( - module, - example_inputs=example_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_full_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_full_tosa_BI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_full_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_full_tosa_ethos_pipeline( - self, compile_spec: list[CompileSpec], module: torch.nn.Module, test_data: Tuple - ): - tester = ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize() - .export() - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_full_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - def _test_full_tosa_u55_pipeline(self, module: torch.nn.Module, test_data: Tuple): - self._test_full_tosa_ethos_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_full_tosa_u85_pipeline(self, module: torch.nn.Module, test_data: Tuple): - self._test_full_tosa_ethos_pipeline( - common.get_u85_compile_spec(), module, test_data - ) - - def test_only_full_tosa_MI(self): - self._test_full_tosa_MI_pipeline(self.Full(), ()) - - def test_const_full_tosa_MI(self): - _input = torch.rand((2, 2, 3, 3)) * 10 - self._test_full_tosa_MI_pipeline(self.AddConstFull(), (_input,)) - - @parameterized.expand(FullLike.test_parameters) - def test_full_like_tosa_MI(self, test_tensor: Tuple): - self._test_full_tosa_MI_pipeline(self.FullLike(), test_tensor) - - @parameterized.expand(AddVariableFull.test_parameters) - def test_full_tosa_MI(self, test_tensor: Tuple): - self._test_full_tosa_MI_pipeline( - self.AddVariableFull(), example_data=test_tensor - ) - - @parameterized.expand(AddVariableFull.test_parameters) - def test_full_tosa_BI(self, test_tensor: Tuple): - self._test_full_tosa_BI_pipeline(self.AddVariableFull(), test_tensor) - - @parameterized.expand(FullLike.test_parameters) - def test_full_like_tosa_BI(self, test_tensor: Tuple): - self._test_full_tosa_BI_pipeline(self.FullLike(), test_tensor) - - @parameterized.expand(AddVariableFull.test_parameters) - @pytest.mark.corstone_fvp - def test_full_u55_BI(self, test_tensor: Tuple): - self._test_full_tosa_u55_pipeline( - self.AddVariableFull(), - test_tensor, - ) - - @parameterized.expand(AddVariableFull.test_parameters) - @pytest.mark.corstone_fvp - def test_full_u85_BI(self, test_tensor: Tuple): - self._test_full_tosa_u85_pipeline( - self.AddVariableFull(), - test_tensor, - ) - - def test_integer_value(self): - _input = torch.ones((2, 2)) - integer_fill_value = 1 - self._test_full_tosa_MI_pipeline( - self.AddVariableFull(), example_data=(_input, integer_fill_value) - ) - - # This fails since the fill value in the full tensor is set at compile time by the example data (1.). - # Test data tries to set it again at runtime (to 2.) but it doesn't do anything. - # In eager mode, the fill value can be set at runtime, causing the outputs to not match. - @unittest.expectedFailure - def test_set_value_at_runtime(self): - _input = torch.ones((2, 2)) - example_fill_value = 1.0 - test_fill_value = 2.0 - self._test_full_tosa_MI_pipeline( - self.AddVariableFull(), - example_data=(_input, example_fill_value), - test_data=(_input, test_fill_value), - ) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor, int] + +exir_op = "executorch_exir_dialects_edge__ops_aten_full_default" + + +class Full(torch.nn.Module): + # A single full op + def forward(self): + return torch.full((3, 3), 4.5) + + +class AddConstFull(torch.nn.Module): + # Input + a full with constant value. + def forward(self, x: torch.Tensor): + return torch.full((2, 2, 3, 3), 4.5, dtype=torch.float32) + x + + +class AddVariableFull(torch.nn.Module): + sizes: list[tuple[int, ...]] = [ + (5,), + (5, 5), + (5, 5, 5), + (1, 5, 5, 5), + ] + test_parameters = {} + for i, n in enumerate(sizes): + test_parameters[f"slice_randn_{i}"] = (torch.randn(n) * 10 - 5, 3.2) + + def forward(self, x: torch.Tensor, y): + # Input + a full with the shape from the input and a given value 'y'. + return x + torch.full(x.shape, y) + + +class FullLike(torch.nn.Module): + """Since full_like is replaced with full, we only need to test on reference model, not FVP.""" + + test_parameters = { + "full_like_value_3_2": lambda: (torch.randn(2, 2, 2, 2) * 50, 3.2), + "full_like_value_3": lambda: (torch.randn(2, 2, 2, 2) * 50, 3), + "full_like_value_3_2_int32": lambda: ( + (torch.randn(2, 2, 2, 2) * 50).to(torch.int32), + 3.2, + ), + "full_like_value_3_int32": lambda: ( + (torch.randn(2, 2, 2, 2) * 50).to(torch.int32), + 3, + ), + } + + def forward(self, input_tensor: torch.Tensor, value): + # Our backend can't handle tensors without users, which input_tensor doesn't have + # when the full_like is converted to a full. Therefore involve it in the output. + return input_tensor + torch.full_like(input_tensor, value) + + +def test_full_tosa_MI_only(): + pipeline = TosaPipelineMI[input_t1]( + Full(), + (), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +def test_full_tosa_MI_const(): + test_data = (torch.rand((2, 2, 3, 3)) * 10,) + pipeline = TosaPipelineMI[input_t1]( + AddConstFull(), + test_data, + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", FullLike.test_parameters) +def test_full_like_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + FullLike(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", AddVariableFull.test_parameters) +def test_full_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + AddVariableFull(), + test_data, + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", AddVariableFull.test_parameters) +def test_full_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + AddVariableFull(), + test_data, + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", FullLike.test_parameters) +def test_full_like_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + FullLike(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.pop_stage("check.quant_nodes") + pipeline.run() + + +@common.parametrize("test_data", AddVariableFull.test_parameters) +@common.XfailIfNoCorstone320 +def test_full_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + AddVariableFull(), + test_data, + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", AddVariableFull.test_parameters) +@common.XfailIfNoCorstone300 +def test_full_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + AddVariableFull(), + test_data, + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +# This fails since full outputs int64 by default if 'fill_value' is integer, which our backend doesn't support. +@pytest.mark.skip( + "This fails since full outputs int64 by default if 'fill_value' is integer, which our backend doesn't support." +) +def test_full_tosa_MI_integer_value(): + test_data = (torch.ones((2, 2)), 1.0) + pipeline = TosaPipelineMI[input_t1]( + AddVariableFull(), + test_data, + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +# This fails since the fill value in the full tensor is set at compile time by the example data (1.). +# Test data tries to set it again at runtime (to 2.) but it doesn't do anything. +# In eager mode, the fill value can be set at runtime, causing the outputs to not match. +@pytest.mark.skip( + "This fails since the fill value in the full tensor is set at compile time by the example data (1.)." +) +def test_full_tosa_MI_set_value_at_runtime(tosa_version: str): + test_data = (torch.ones((2, 2)), 1.0) + pipeline = TosaPipelineMI[input_t1]( + AddVariableFull(), + test_data, + aten_op=[], + exir_op=exir_op, + ) + pipeline.pop_stage("run_method_and_compare_outputs") + pipeline.add_stage( + pipeline.tester.run_method_and_compare_outputs, inputs=(torch.ones((2, 2)), 2.0) + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_ge.py b/backends/arm/test/ops/test_ge.py index 7bcd2c923a4..19c036be526 100644 --- a/backends/arm/test/ops/test_ge.py +++ b/backends/arm/test/ops/test_ge.py @@ -62,25 +62,25 @@ def get_inputs(self): op_ge_scalar_rank4_randn = GreaterEqual(torch.randn(3, 2, 2, 2), 0.3) test_data_tensor = { - "ge_tensor_rank1_ones": op_ge_tensor_rank1_ones, - "ge_tensor_rank2_rand": op_ge_tensor_rank2_rand, - "ge_tensor_rank3_randn": op_ge_tensor_rank3_randn, - "ge_tensor_rank4_randn": op_ge_tensor_rank4_randn, + "ge_tensor_rank1_ones": lambda: op_ge_tensor_rank1_ones, + "ge_tensor_rank2_rand": lambda: op_ge_tensor_rank2_rand, + "ge_tensor_rank3_randn": lambda: op_ge_tensor_rank3_randn, + "ge_tensor_rank4_randn": lambda: op_ge_tensor_rank4_randn, } test_data_scalar = { - "ge_scalar_rank1_ones": op_ge_scalar_rank1_ones, - "ge_scalar_rank2_rand": op_ge_scalar_rank2_rand, - "ge_scalar_rank3_randn": op_ge_scalar_rank3_randn, - "ge_scalar_rank4_randn": op_ge_scalar_rank4_randn, + "ge_scalar_rank1_ones": lambda: op_ge_scalar_rank1_ones, + "ge_scalar_rank2_rand": lambda: op_ge_scalar_rank2_rand, + "ge_scalar_rank3_randn": lambda: op_ge_scalar_rank3_randn, + "ge_scalar_rank4_randn": lambda: op_ge_scalar_rank4_randn, } @common.parametrize("test_module", test_data_tensor) def test_ge_tensor_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_tensor, GreaterEqual.exir_op, ) @@ -90,8 +90,8 @@ def test_ge_tensor_tosa_MI(test_module): @common.parametrize("test_module", test_data_scalar) def test_ge_scalar_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_scalar, GreaterEqual.exir_op, ) @@ -101,8 +101,8 @@ def test_ge_scalar_tosa_MI(test_module): @common.parametrize("test_module", test_data_tensor) def test_ge_tensor_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_tensor, GreaterEqual.exir_op, ) @@ -112,8 +112,8 @@ def test_ge_tensor_tosa_BI(test_module): @common.parametrize("test_module", test_data_scalar) def test_ge_scalar_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_tensor, GreaterEqual.exir_op, ) @@ -125,10 +125,11 @@ def test_ge_scalar_tosa_BI(test_module): def test_ge_tensor_u55_BI(test_module): # GREATER_EQUAL is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {GreaterEqual.exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -138,11 +139,12 @@ def test_ge_tensor_u55_BI(test_module): def test_ge_scalar_u55_BI(test_module): # GREATER_EQUAL is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {GreaterEqual.exir_op: 1}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -155,8 +157,8 @@ def test_ge_scalar_u55_BI(test_module): @common.XfailIfNoCorstone320 def test_ge_tensor_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_tensor, GreaterEqual.exir_op, run_on_fvp=True, @@ -172,8 +174,8 @@ def test_ge_tensor_u85_BI(test_module): @common.XfailIfNoCorstone320 def test_ge_scalar_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), GreaterEqual.aten_op_tensor, GreaterEqual.exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_gelu.py b/backends/arm/test/ops/test_gelu.py index fb1253fdb0c..6ac9b5dabf5 100644 --- a/backends/arm/test/ops/test_gelu.py +++ b/backends/arm/test/ops/test_gelu.py @@ -22,51 +22,51 @@ class Gelu(torch.nn.Module): exir_op = "executorch_exir_dialects_edge__ops_aten_gelu_default" test_data: dict[str, Tuple[str, input_t1]] = { - "zeros_none": ( + "zeros_none": lambda: ( "none", torch.zeros(1, 10, 10, 10), ), - "ones_none": ( + "ones_none": lambda: ( "none", torch.ones(10, 10, 10), ), - "rand_none": ( + "rand_none": lambda: ( "none", (torch.rand(10, 10) - 0.5), ), - "randn_pos_none": ( + "randn_pos_none": lambda: ( "none", (torch.randn(1, 4, 4, 4) + 10), ), - "randn_neg_none": ( + "randn_neg_none": lambda: ( "none", (torch.randn(1, 4, 4, 4) - 10), ), - "ramp_none": ( + "ramp_none": lambda: ( "none", torch.arange(-16, 16, 0.2), ), - "zeros_tanh": ( + "zeros_tanh": lambda: ( "tanh", torch.zeros(1, 10, 10, 10), ), - "ones_tanh": ( + "ones_tanh": lambda: ( "tanh", torch.ones(10, 10, 10), ), - "rand_tanh": ( + "rand_tanh": lambda: ( "tanh", (torch.rand(10, 10) - 0.5), ), - "randn_pos_tanh": ( + "randn_pos_tanh": lambda: ( "tanh", (torch.randn(1, 4, 4, 4) + 10), ), - "randn_neg_tanh": ( + "randn_neg_tanh": lambda: ( "tanh", (torch.randn(1, 4, 4, 4) - 10), ), - "ramp_tanh": ( + "ramp_tanh": lambda: ( "tanh", torch.arange(-16, 16, 0.2), ), @@ -82,10 +82,10 @@ def forward(self, x: torch.Tensor): @common.parametrize("test_data", Gelu.test_data) def test_gelu_tosa_MI(test_data: input_t1): - approximate = test_data[0] + approximate, test_data = test_data() TosaPipelineMI[input_t1]( Gelu(approximate), - (test_data[1],), + (test_data,), Gelu.aten_op, Gelu.exir_op, use_to_edge_transform_and_lower=False, @@ -94,32 +94,34 @@ def test_gelu_tosa_MI(test_data: input_t1): @common.parametrize("test_data", Gelu.test_data) def test_gelu_tosa_BI(test_data: input_t1): - approximate = test_data[0] + approximate, test_data = test_data() TosaPipelineBI[input_t1]( Gelu(approximate), - (test_data[1],), + (test_data,), Gelu.aten_op, Gelu.exir_op, ).run() @common.parametrize("test_data", Gelu.test_data) +@common.XfailIfNoCorstone300 def test_gelu_u55_BI(test_data: input_t1): - approximate = test_data[0] + approximate, test_data = test_data() EthosU55PipelineBI[input_t1]( Gelu(approximate), - (test_data[1],), + (test_data,), Gelu.aten_op, Gelu.exir_op, ).run() @common.parametrize("test_data", Gelu.test_data) +@common.XfailIfNoCorstone320 def test_gelu_u85_BI(test_data: input_t1): - approximate = test_data[0] + approximate, test_data = test_data() EthosU85PipelineBI[input_t1]( Gelu(approximate), - (test_data[1],), + (test_data,), Gelu.aten_op, Gelu.exir_op, ).run() diff --git a/backends/arm/test/ops/test_gt.py b/backends/arm/test/ops/test_gt.py index 15515958c85..0a1b97928fd 100644 --- a/backends/arm/test/ops/test_gt.py +++ b/backends/arm/test/ops/test_gt.py @@ -63,24 +63,27 @@ def get_inputs(self): op_gt_scalar_rank4_randn = Greater(torch.randn(3, 2, 2, 2), 0.3) test_data_tensor = { - "gt_tensor_rank1_ones": op_gt_tensor_rank1_ones, - "gt_tensor_rank2_rand": op_gt_tensor_rank2_rand, - "gt_tensor_rank3_randn": op_gt_tensor_rank3_randn, - "gt_tensor_rank4_randn": op_gt_tensor_rank4_randn, + "gt_tensor_rank1_ones": lambda: op_gt_tensor_rank1_ones, + "gt_tensor_rank2_rand": lambda: op_gt_tensor_rank2_rand, + "gt_tensor_rank3_randn": lambda: op_gt_tensor_rank3_randn, + "gt_tensor_rank4_randn": lambda: op_gt_tensor_rank4_randn, } test_data_scalar = { - "gt_scalar_rank1_ones": op_gt_scalar_rank1_ones, - "gt_scalar_rank2_rand": op_gt_scalar_rank2_rand, - "gt_scalar_rank3_randn": op_gt_scalar_rank3_randn, - "gt_scalar_rank4_randn": op_gt_scalar_rank4_randn, + "gt_scalar_rank1_ones": lambda: op_gt_scalar_rank1_ones, + "gt_scalar_rank2_rand": lambda: op_gt_scalar_rank2_rand, + "gt_scalar_rank3_randn": lambda: op_gt_scalar_rank3_randn, + "gt_scalar_rank4_randn": lambda: op_gt_scalar_rank4_randn, } @common.parametrize("test_module", test_data_tensor) def test_gt_tensor_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), Greater.aten_op_tensor, Greater.exir_op + test_module(), + test_module().get_inputs(), + Greater.aten_op_tensor, + Greater.exir_op, ) pipeline.run() @@ -88,7 +91,10 @@ def test_gt_tensor_tosa_MI(test_module): @common.parametrize("test_module", test_data_scalar) def test_gt_scalar_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), Greater.aten_op_scalar, Greater.exir_op + test_module(), + test_module().get_inputs(), + Greater.aten_op_scalar, + Greater.exir_op, ) pipeline.run() @@ -96,7 +102,10 @@ def test_gt_scalar_tosa_MI(test_module): @common.parametrize("test_module", test_data_tensor) def test_gt_tensor_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), Greater.aten_op_tensor, Greater.exir_op + test_module(), + test_module().get_inputs(), + Greater.aten_op_tensor, + Greater.exir_op, ) pipeline.run() @@ -104,7 +113,10 @@ def test_gt_tensor_tosa_BI(test_module): @common.parametrize("test_module", test_data_scalar) def test_gt_scalar_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), Greater.aten_op_tensor, Greater.exir_op + test_module(), + test_module().get_inputs(), + Greater.aten_op_tensor, + Greater.exir_op, ) pipeline.run() @@ -114,10 +126,11 @@ def test_gt_scalar_tosa_BI(test_module): def test_gt_tensor_u55_BI(test_module): # Greater is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {Greater.exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -127,11 +140,12 @@ def test_gt_tensor_u55_BI(test_module): def test_gt_scalar_u55_BI(test_module): # Greater is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {Greater.exir_op: 1}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -146,8 +160,8 @@ def test_gt_scalar_u55_BI(test_module): @common.XfailIfNoCorstone320 def test_gt_tensor_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), Greater.aten_op_tensor, Greater.exir_op, run_on_fvp=True, @@ -165,8 +179,8 @@ def test_gt_tensor_u85_BI(test_module): @common.XfailIfNoCorstone320 def test_gt_scalar_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), Greater.aten_op_tensor, Greater.exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_hardsigmoid.py b/backends/arm/test/ops/test_hardsigmoid.py index f73a995b120..399c6088e89 100644 --- a/backends/arm/test/ops/test_hardsigmoid.py +++ b/backends/arm/test/ops/test_hardsigmoid.py @@ -1,128 +1,89 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +aten_op = "torch.ops.aten.hardsigmoid.default" +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(1, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestHardsigmoid(unittest.TestCase): - class Hardsigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - self.hardsigmoid = torch.nn.Hardsigmoid() - - def forward(self, x): - return self.hardsigmoid(x) - - def _test_hardsigmoid_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.hardsigmoid.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardsigmoid_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.hardsigmoid.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardsigmoid_tosa_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.hardsigmoid.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_hardsigmoid_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_hardsigmoid_tosa_MI_pipeline(self.Hardsigmoid(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_hardsigmoid_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardsigmoid_tosa_BI_pipeline(self.Hardsigmoid(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardsigmoid_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardsigmoid_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), self.Hardsigmoid(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardsigmoid_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardsigmoid_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), self.Hardsigmoid(), (test_data,) - ) + "zeros": lambda: torch.zeros(1, 10, 10, 10), + "ones": lambda: torch.ones(10, 10, 10), + "rand": lambda: torch.rand(10, 10) - 0.5, + "randn_pos": lambda: torch.randn(10) + 10, + "randn_neg": lambda: torch.randn(10) - 10, + "ramp": lambda: torch.arange(-16, 16, 0.2), +} + + +class Hardsigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + self.hardsigmoid = torch.nn.Hardsigmoid() + + def forward(self, x): + return self.hardsigmoid(x) + + +@common.parametrize("test_data", test_data_suite) +def test_hardsigmoid_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Hardsigmoid(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_hardsigmoid_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Hardsigmoid(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_hardsigmoid_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Hardsigmoid(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_hardsigmoid_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Hardsigmoid(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_hardswish.py b/backends/arm/test/ops/test_hardswish.py index 81aba540e3f..bd61346e3db 100644 --- a/backends/arm/test/ops/test_hardswish.py +++ b/backends/arm/test/ops/test_hardswish.py @@ -1,128 +1,79 @@ # Copyright 2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +aten_op = "torch.ops.aten.hardswish.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_clamp_default" -test_data_suite = [ +input_t1 = Tuple[torch.Tensor] + +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(1, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestHardswish(unittest.TestCase): - class Hardswish(torch.nn.Module): - def __init__(self): - super().__init__() - self.hardswish = torch.nn.Hardswish() - - def forward(self, x): - return self.hardswish(x) - - def _test_hardswish_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.hardswish.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardswish_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.hardswish.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardswish_tosa_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.hardswish.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["executorch_exir_dialects_edge__ops_aten_clamp_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_hardswish_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_hardswish_tosa_MI_pipeline(self.Hardswish(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_hardswish_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardswish_tosa_BI_pipeline(self.Hardswish(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardswish_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardswish_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), self.Hardswish(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardswish_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardswish_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), self.Hardswish(), (test_data,) - ) + "zeros": lambda: (torch.zeros(1, 10, 10, 10)), + "ones": lambda: (torch.ones(10, 10, 10)), + "rand": lambda: (torch.rand(10, 10) - 0.5), + "randn_pos": lambda: (torch.randn(10) + 10), + "randn_neg": lambda: (torch.randn(10) - 10), + "ramp": lambda: (torch.arange(-16, 16, 0.2)), +} + + +class Hardswish(torch.nn.Module): + def __init__(self): + super().__init__() + self.hardswish = torch.nn.Hardswish() + + def forward(self, x): + return self.hardswish(x) + + +@common.parametrize("test_data", test_data_suite) +def test_hardswish_tosa_MI(test_data): + pipeline = TosaPipelineMI[input_t1](Hardswish(), (test_data(),), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_hardswish_tosa_BI(test_data): + pipeline = TosaPipelineBI[input_t1](Hardswish(), (test_data(),), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_hardswish_u55_BI(test_data): + EthosU55PipelineBI[input_t1]( + Hardswish(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ).run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_hardswish_u85_BI(test_data): + EthosU85PipelineBI[input_t1]( + Hardswish(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ).run() diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py index 46b44078785..f1a50467df7 100644 --- a/backends/arm/test/ops/test_hardtanh.py +++ b/backends/arm/test/ops/test_hardtanh.py @@ -1,143 +1,91 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, ) -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester - -from executorch.backends.arm.tosa_specification import TosaSpecification -from executorch.backends.xnnpack.test.tester.tester import Quantize -from parameterized import parameterized - -test_data_suite = [ +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(1, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestHardTanh(unittest.TestCase): - """Tests HardTanh Operator.""" - - class HardTanh(torch.nn.Module): - - def __init__(self): - super().__init__() - - self.hardTanh = torch.nn.Hardtanh() - - def forward(self, x): - return self.hardTanh(x) - - def _test_hardtanh_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.hardtanh.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardtanh_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.hardtanh.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_hardtanh_tosa_ethosu_BI_pipeline( - self, compile_spec, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.hardtanh.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_hardtanh_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_hardtanh_tosa_MI_pipeline(self.HardTanh(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_hardtanh_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardtanh_tosa_BI_pipeline(self.HardTanh(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardtanh_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardtanh_tosa_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.HardTanh(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_hardtanh_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_hardtanh_tosa_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.HardTanh(), (test_data,) - ) + "zeros": lambda: (torch.zeros(1, 10, 10, 10)), + "ones": lambda: (torch.ones(10, 10, 10)), + "rand": lambda: (torch.rand(10, 10) - 0.5), + "randn_pos": lambda: (torch.randn(10) + 10), + "randn_neg": lambda: (torch.randn(10) - 10), + "ramp": lambda: (torch.arange(-16, 16, 0.2)), +} + +aten_op = "torch.ops.aten.hardtanh.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_hardtanh_default" + +input_t = Tuple[torch.Tensor] + + +class HardTanh(torch.nn.Module): + + def __init__(self): + super().__init__() + + self.hardTanh = torch.nn.Hardtanh() + + def forward(self, x): + return self.hardTanh(x) + + +@common.parametrize("test_data", test_data_suite) +def test_hardtanh_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t](HardTanh(), (test_data(),), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_hardtanh_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t]( + HardTanh(), + (test_data(),), + aten_op, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_hardtanh_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t]( + HardTanh(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_hardtanh_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t]( + HardTanh(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_layer_norm.py b/backends/arm/test/ops/test_layer_norm.py index 7ed181711a1..d2d9aa0bc14 100644 --- a/backends/arm/test/ops/test_layer_norm.py +++ b/backends/arm/test/ops/test_layer_norm.py @@ -42,18 +42,21 @@ def forward(self, x): input_t = tuple[torch.Tensor] test_data_suite = { - "randn_last_dim": ((torch.randn(1, 5, 5, 5),), LayerNorm([5])), - "rand_last_two_dims": ((torch.rand(1, 5, 5, 5),), LayerNorm([5, 5])), - "rand_last_two_dims_not_elementwise_affine": ( + "randn_last_dim": lambda: ((torch.randn(1, 5, 5, 5),), LayerNorm([5])), + "rand_last_two_dims": lambda: ((torch.rand(1, 5, 5, 5),), LayerNorm([5, 5])), + "rand_last_two_dims_not_elementwise_affine": lambda: ( (torch.rand(1, 5, 5, 5),), LayerNorm([5, 5], 1e-5, False), ), - "rand_last_two_dims_not_elementwise_affine_no_bias": ( + "rand_last_two_dims_not_elementwise_affine_no_bias": lambda: ( (torch.rand(1, 5, 5, 5),), LayerNorm([5, 5], 1e-5, False, False), ), - "randn_last_three_dims": ((torch.randn(1, 15, 10, 5),), LayerNorm([15, 10, 5])), - "randn_last_three_dims_no_bias": ( + "randn_last_three_dims": lambda: ( + (torch.randn(1, 15, 10, 5),), + LayerNorm([15, 10, 5]), + ), + "randn_last_three_dims_no_bias": lambda: ( (torch.randn(1, 15, 10, 5),), LayerNorm([15, 10, 5], 1e-2, False, False), ), @@ -62,9 +65,10 @@ def forward(self, x): @common.parametrize("test_data", test_data_suite) def test_native_layer_norm_tosa_MI(test_data): + test_data, model = test_data() pipeline = TosaPipelineMI[input_t]( - test_data[1], - test_data[0], + model, + test_data, "torch.ops.aten.layer_norm.default", ) pipeline.run() @@ -72,9 +76,10 @@ def test_native_layer_norm_tosa_MI(test_data): @common.parametrize("test_data", test_data_suite) def test_native_layer_norm_tosa_BI(test_data): + test_data, model = test_data() pipeline = TosaPipelineBI[input_t]( - test_data[1], - test_data[0], + model, + test_data, "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition ) pipeline.change_args("run_method_and_compare_outputs", qtol=1) @@ -84,9 +89,10 @@ def test_native_layer_norm_tosa_BI(test_data): @common.parametrize("test_data", test_data_suite) @common.XfailIfNoCorstone300 def test_native_layer_norm_u55_BI(test_data): + test_data, model = test_data() pipeline = EthosU55PipelineBI[input_t]( - test_data[1], - test_data[0], + model, + test_data, "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition run_on_fvp=True, ) @@ -97,9 +103,10 @@ def test_native_layer_norm_u55_BI(test_data): @common.parametrize("test_data", test_data_suite) @common.XfailIfNoCorstone320 def test_native_layer_norm_u85_BI(test_data): + test_data, model = test_data() pipeline = EthosU85PipelineBI[input_t]( - test_data[1], - test_data[0], + model, + test_data, "torch.ops.aten.sub.Tensor", # Just check for sub op included in the layernorm decomposition run_on_fvp=True, ) diff --git a/backends/arm/test/ops/test_le.py b/backends/arm/test/ops/test_le.py index 7e243ead620..217e409c6f5 100644 --- a/backends/arm/test/ops/test_le.py +++ b/backends/arm/test/ops/test_le.py @@ -5,7 +5,6 @@ from typing import Tuple -import pytest import torch from executorch.backends.arm.test import common @@ -57,63 +56,38 @@ def get_inputs(self): ) test_data_common = { - "le_rank1_ones": op_le_rank1_ones, - "le_rank2_rand": op_le_rank2_rand, - "le_rank3_randn": op_le_rank3_randn, - "le_rank4_randn": op_le_rank4_randn, + "le_rank1_ones": lambda: op_le_rank1_ones, + "le_rank2_rand": lambda: op_le_rank2_rand, + "le_rank3_randn": lambda: op_le_rank3_randn, + "le_rank4_randn": lambda: op_le_rank4_randn, } @common.parametrize("test_module", test_data_common) -def test_le_tosa_MI(test_module): +def test_le_tensor_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), test_module().get_inputs(), aten_op, exir_op ) pipeline.run() @common.parametrize("test_module", test_data_common) -def test_le_tosa_BI(test_module): +def test_le_tensor_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), test_module().get_inputs(), aten_op, exir_op ) pipeline.run() @common.parametrize("test_module", test_data_common) -def test_le_u55_BI(test_module): +def test_le_tensor_u55_BI_not_delegated(test_module): # GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", - {exir_op: 1}, - ) - pipeline.run() - - -@common.parametrize("test_module", test_data_common) -def test_le_u85_BI(test_module): - pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), - aten_op, - exir_op, - run_on_fvp=False, - use_to_edge_transform_and_lower=True, - ) - pipeline.run() - - -@common.parametrize("test_module", test_data_common) -@pytest.mark.skip(reason="The same as test_le_u55_BI") -def test_le_u55_BI_on_fvp(test_module): - # GREATER_EQUAL is not supported on U55. LE uses the GREATER_EQUAL Tosa operator. - pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -123,11 +97,11 @@ def test_le_u55_BI_on_fvp(test_module): test_data_common, xfails={"le_rank4_randn": "4D fails because boolean Tensors can't be subtracted"}, ) -@common.SkipIfNoCorstone320 -def test_le_u85_BI_on_fvp(test_module): +@common.XfailIfNoCorstone320 +def test_le_tensor_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), aten_op, exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_leaky_relu.py b/backends/arm/test/ops/test_leaky_relu.py index b9f0c3a8d1a..a83c2812bf0 100644 --- a/backends/arm/test/ops/test_leaky_relu.py +++ b/backends/arm/test/ops/test_leaky_relu.py @@ -28,19 +28,22 @@ def forward(self, x: torch.Tensor): return self.activation(x) test_data: dict[str, input_t1] = { - "zeros": ((torch.zeros(1, 1, 5, 5),), 0.01), - "ones": ((torch.ones(1, 32, 112, 112),), 0.01), - "rand": ((torch.rand(1, 96, 56, 56),), 0.2), - "3Dtensor": ((torch.rand(5, 5, 5),), 0.001), - "negative_slope": ((torch.rand(1, 16, 128, 128),), -0.002), + "zeros": lambda: ((torch.zeros(1, 1, 5, 5),), 0.01), + "ones": lambda: ((torch.ones(1, 32, 112, 112),), 0.01), + "rand": lambda: ((torch.rand(1, 96, 56, 56),), 0.2), + "3Dtensor": lambda: ((torch.rand(5, 5, 5),), 0.001), + "negative_slope": lambda: ((torch.rand(1, 16, 128, 128),), -0.002), } @common.parametrize("test_data", LeakyReLU.test_data) def test_leaky_relu_tosa_MI(test_data): - data, slope = test_data + data, slope = test_data() pipeline = TosaPipelineMI[input_t1]( - LeakyReLU(slope), data, [], use_to_edge_transform_and_lower=True + LeakyReLU(slope), + data, + [], + use_to_edge_transform_and_lower=True, ) pipeline.add_stage_after( "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op] @@ -50,9 +53,12 @@ def test_leaky_relu_tosa_MI(test_data): @common.parametrize("test_data", LeakyReLU.test_data) def test_leaky_relu_tosa_BI(test_data): - data, slope = test_data + data, slope = test_data() pipeline = TosaPipelineBI[input_t1]( - LeakyReLU(slope), data, [], use_to_edge_transform_and_lower=True + LeakyReLU(slope), + data, + [], + use_to_edge_transform_and_lower=True, ) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.run() @@ -61,7 +67,7 @@ def test_leaky_relu_tosa_BI(test_data): @common.parametrize("test_data", LeakyReLU.test_data) @common.XfailIfNoCorstone300 def test_leaky_relu_u55_BI(test_data): - data, slope = test_data + data, slope = test_data() pipeline = EthosU55PipelineBI[input_t1]( LeakyReLU(slope), data, @@ -76,7 +82,7 @@ def test_leaky_relu_u55_BI(test_data): @common.parametrize("test_data", LeakyReLU.test_data) @common.XfailIfNoCorstone320 def test_leaky_relu_u85_BI(test_data): - data, slope = test_data + data, slope = test_data() pipeline = EthosU85PipelineBI[input_t1]( LeakyReLU(slope), data, diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py index 9a289909bae..56d33097999 100644 --- a/backends/arm/test/ops/test_linear.py +++ b/backends/arm/test/ops/test_linear.py @@ -1,271 +1,199 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple import pytest import torch -from executorch.backends.arm.test import common, conftest +from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +aten_op = "torch.ops.aten.linear.default" -test_data_suite_rank1 = [ +input_t1 = Tuple[torch.Tensor] + +test_data_suite_rank1 = { # (test_name, test_data, out_features, has_bias) - ( - "model_linear_rank1_zeros", + "model_linear_rank1_zeros": lambda: ( torch.zeros(10), 15, True, ), - ( - "model_linear_rank1_ones", + "model_linear_rank1_ones": lambda: ( torch.ones(10), 15, False, ), - ( - "model_linear_rank1_negative_ones", + "model_linear_rank1_negative_ones": lambda: ( torch.ones(10) * (-1), 20, True, ), - ( - "model_linear_rank1_rand", + "model_linear_rank1_rand": lambda: ( torch.rand(10), 10, True, ), - ( - "model_linear_rank1_negative_large_rand", + "model_linear_rank1_negative_large_rand": lambda: ( torch.rand(10) * (-100), 30, False, ), - ( - "model_linear_rank1_large_randn", + "model_linear_rank1_large_randn": lambda: ( torch.randn(15) * 100, 20, True, ), -] +} -test_data_suite_rank4 = [ +test_data_suite_rank4 = { # (test_name, test_data, out_features, has_bias) - ( - "model_linear_rank4_zeros", + "model_linear_rank4_zeros": lambda: ( torch.zeros(5, 10, 25, 20), 30, True, ), - ( - "model_linear_rank4_ones", + "model_linear_rank4_ones": lambda: ( torch.ones(5, 10, 25, 20), 30, False, ), - ( - "model_linear_rank4_negative_ones", + "model_linear_rank4_negative_ones": lambda: ( torch.ones(5, 10, 25, 20) * (-1), 30, True, ), - ( - "model_linear_rank4_rand", + "model_linear_rank4_rand": lambda: ( torch.rand(5, 10, 25, 20), 30, False, ), - ( - "model_linear_rank4_negative_large_rand", + "model_linear_rank4_negative_large_rand": lambda: ( torch.rand(5, 10, 25, 20) * (-100), 30, True, ), - ( - "model_linear_rank4_large_randn", + "model_linear_rank4_large_randn": lambda: ( torch.randn(5, 10, 25, 20) * 100, 30, False, ), -] - - -class TestLinear(unittest.TestCase): - """tests the linear operation y = Ax + b""" - - class Linear(torch.nn.Module): - def __init__( - self, - in_features: int, - out_features: int = 3, - bias: bool = True, - ): - super().__init__() - self.fc = torch.nn.Linear( - in_features=in_features, - out_features=out_features, - bias=bias, - ) - - def forward(self, x): - return self.fc(x) +} - def _test_linear_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+MI", - ), - ) - .export() - .check_count({"torch.ops.aten.linear.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - def _test_linear_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.linear.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) - - def _test_linear_tosa_ethosu_BI_pipeline( +class Linear(torch.nn.Module): + def __init__( self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ) -> ArmTester: - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.linear.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - # TODO: Add FVP testing support. - return tester - - @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) - @pytest.mark.tosa_ref_model - def test_linear_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - out_features: int, - has_bias: bool, + in_features: int, + out_features: int = 3, + bias: bool = True, ): - in_features = test_data.shape[-1] - test_data = (test_data,) - self._test_linear_tosa_MI_pipeline( - self.Linear( - in_features=in_features, - out_features=out_features, - bias=has_bias, - ), - test_data, + super().__init__() + self.fc = torch.nn.Linear( + in_features=in_features, + out_features=out_features, + bias=bias, ) - @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) - @pytest.mark.tosa_ref_model - def test_linear_tosa_BI( - self, - test_name: str, - test_data: torch.Tensor, - out_features: int, - has_bias: bool, - ): - in_features = test_data.shape[-1] - test_data = (test_data,) - self._test_linear_tosa_BI_pipeline( - self.Linear( - in_features=in_features, out_features=out_features, bias=has_bias - ), - test_data, - ) - - @parameterized.expand(test_data_suite_rank1) - @pytest.mark.corstone_fvp - def test_linear_tosa_u55_BI( - self, - test_name: str, - test_data: torch.Tensor, - out_features: int, - has_bias: bool, - ): - in_features = test_data.shape[-1] - test_data = (test_data,) - tester = self._test_linear_tosa_ethosu_BI_pipeline( - self.Linear( - in_features=in_features, - out_features=out_features, - bias=has_bias, - ), - common.get_u55_compile_spec(), - test_data, - ) - - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite_rank1 + test_data_suite_rank4) - @pytest.mark.corstone_fvp - def test_linear_tosa_u85_BI( - self, - test_name: str, - test_data: torch.Tensor, - out_features: int, - has_bias: bool, - ): - in_features = test_data.shape[-1] - test_data = (test_data,) - self._test_linear_tosa_ethosu_BI_pipeline( - self.Linear( - in_features=in_features, - out_features=out_features, - bias=has_bias, - ), - common.get_u85_compile_spec(), - test_data, - ) + def forward(self, x): + return self.fc(x) + + +@common.parametrize("test_data", test_data_suite_rank1 | test_data_suite_rank4) +def test_linear_tosa_MI(test_data: torch.Tensor): + test_data, out_features, has_bias = test_data() + in_features = test_data.shape[-1] + pipeline = TosaPipelineMI[input_t1]( + Linear( + in_features=in_features, + out_features=out_features, + bias=has_bias, + ), + (test_data,), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@pytest.mark.flaky(reruns=5) # TODO: Investigate flakyness. +@common.parametrize("test_data", test_data_suite_rank1 | test_data_suite_rank4) +def test_linear_tosa_BI(test_data: torch.Tensor): + test_data, out_features, has_bias = test_data() + in_features = test_data.shape[-1] + pipeline = TosaPipelineBI[input_t1]( + Linear( + in_features=in_features, + out_features=out_features, + bias=has_bias, + ), + (test_data,), + aten_op, + exir_op=[], + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite_rank1) +@common.XfailIfNoCorstone300 +def test_linear_u55_BI(test_data: torch.Tensor): + test_data, out_features, has_bias = test_data() + in_features = test_data.shape[-1] + EthosU55PipelineBI[input_t1]( + Linear( + in_features=in_features, + out_features=out_features, + bias=has_bias, + ), + (test_data,), + aten_op, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ).run() + + +x_fail = { + "model_linear_rank4_zeros": "AssertionError: Output 0 does not match reference output.", + "model_linear_rank4_ones": "AssertionError: Output 0 does not match reference output.", + "model_linear_rank4_negative_ones": "AssertionError: Output 0 does not match reference output.", + "model_linear_rank4_rand": "AssertionError: Output 0 does not match reference output.", + "model_linear_rank4_negative_large_rand": "AssertionError: Output 0 does not match reference output.", + "model_linear_rank4_large_randn": "AssertionError: Output 0 does not match reference output.", +} + + +@common.parametrize( + "test_data", + test_data_suite_rank1 | test_data_suite_rank4, + x_fail, +) +@common.XfailIfNoCorstone320 +def test_linear_u85_BI(test_data: torch.Tensor): + test_data, out_features, has_bias = test_data() + in_features = test_data.shape[-1] + EthosU85PipelineBI[input_t1]( + Linear( + in_features=in_features, + out_features=out_features, + bias=has_bias, + ), + (test_data,), + aten_op, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ).run() diff --git a/backends/arm/test/ops/test_log.py b/backends/arm/test/ops/test_log.py index 0226a62328b..0ca4510681d 100644 --- a/backends/arm/test/ops/test_log.py +++ b/backends/arm/test/ops/test_log.py @@ -1,127 +1,75 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.log.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_log_default" + +input_t1 = Tuple[torch.Tensor] -test_data_suite = [ +test_data_suite = { # (test_name, test_data) - ("ones_rank4", torch.ones(1, 10, 10, 10)), - ("ones_rank3", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) + 0.001), - ("randn_pos", torch.randn(10) + 10), - ("randn_spread", torch.max(torch.Tensor([0.0]), torch.randn(10) * 100)), - ("ramp", torch.arange(0.01, 20, 0.2)), -] - - -class TestLog(unittest.TestCase): - """Tests lowering of aten.log""" - - class Log(torch.nn.Module): - def forward(self, x: torch.Tensor) -> torch.Tensor: - return torch.log(x) - - def _test_log_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.log.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_log_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.log.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_log_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.log.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_log_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_log_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_log_tosa_MI_pipeline(self.Log(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_log_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_log_tosa_BI_pipeline(self.Log(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_log_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_log_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Log(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_log_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_log_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Log(), (test_data,) - ) + "ones_rank4": lambda: (torch.ones(1, 10, 10, 10)), + "ones_rank3": lambda: (torch.ones(10, 10, 10)), + "rand": lambda: (torch.rand(10, 10) + 0.001), + "randn_pos": lambda: (torch.randn(10) + 10), + "randn_spread": lambda: (torch.max(torch.Tensor([0.0]), torch.randn(10) * 100)), + "ramp": lambda: (torch.arange(0.01, 20, 0.2)), +} + + +class Log(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.log(x) + + +@common.parametrize("test_data", test_data_suite) +def test_log_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1](Log(), (test_data(),), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_log_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1](Log(), (test_data(),), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_log_u55_BI(test_data: input_t1): + EthosU55PipelineBI[input_t1]( + Log(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + ).run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_log_u85_BI(test_data: input_t1): + EthosU85PipelineBI[input_t1]( + Log(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=True, + ).run() diff --git a/backends/arm/test/ops/test_logical.py b/backends/arm/test/ops/test_logical.py index a4b66339b0c..139653eea97 100644 --- a/backends/arm/test/ops/test_logical.py +++ b/backends/arm/test/ops/test_logical.py @@ -23,19 +23,19 @@ class LogicalBinary(torch.nn.Module): test_data: dict[input_t2] = { - "rank1": ( + "rank1": lambda: ( torch.tensor([True, True, False, False], dtype=torch.bool), torch.tensor([True, False, True, False], dtype=torch.bool), ), - "rand_rank2": ( + "rand_rank2": lambda: ( torch.randint(0, 2, (10, 10), dtype=torch.bool), torch.randint(0, 2, (10, 10), dtype=torch.bool), ), - "rand_rank3": ( + "rand_rank3": lambda: ( torch.randint(0, 2, (10, 10, 10), dtype=torch.bool), torch.randint(0, 2, (10, 10, 10), dtype=torch.bool), ), - "rand_rank4": ( + "rand_rank4": lambda: ( torch.randint(0, 2, (1, 10, 10, 10), dtype=torch.bool), torch.randint(0, 2, (1, 10, 10, 10), dtype=torch.bool), ), @@ -68,10 +68,10 @@ def forward(self, tensor1: torch.Tensor, tensor2: torch.Tensor): class Not(torch.nn.Module): test_data: dict[input_t1] = { - "rank1": (torch.tensor([True, True, False, False], dtype=torch.bool),), - "rand_rank2": (torch.randint(0, 2, (10, 10), dtype=torch.bool),), - "rand_rank3": (torch.randint(0, 2, (10, 10, 10), dtype=torch.bool),), - "rand_rank4": (torch.randint(0, 2, (1, 10, 10, 10), dtype=torch.bool),), + "rank1": lambda: (torch.tensor([True, True, False, False], dtype=torch.bool),), + "rand_rank2": lambda: (torch.randint(0, 2, (10, 10), dtype=torch.bool),), + "rand_rank3": lambda: (torch.randint(0, 2, (10, 10, 10), dtype=torch.bool),), + "rand_rank4": lambda: (torch.randint(0, 2, (1, 10, 10, 10), dtype=torch.bool),), } aten_op = "torch.ops.aten.logical_not.default" @@ -83,23 +83,31 @@ def forward(self, tensor: torch.Tensor): @common.parametrize("test_data", And().test_data) def test_logical_and_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](And(), test_data, And().aten_op, And().exir_op) + pipeline = TosaPipelineMI[input_t2]( + And(), test_data(), And().aten_op, And().exir_op + ) pipeline.run() @common.parametrize("test_data", And().test_data) def test_logical_and_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](And(), test_data, And().aten_op, And().exir_op) + pipeline = TosaPipelineBI[input_t2]( + And(), test_data(), And().aten_op, And().exir_op + ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", And().test_data) -def test_logical_and_u55_BI(test_data: input_t2): +def test_logical_and_u55_BI_not_delegated(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - And(), test_data, "TOSA-0.80+BI+u55", {And().exir_op: 1} + And(), + test_data(), + {And().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -109,7 +117,7 @@ def test_logical_and_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_logical_and_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - And(), test_data, And().aten_op, And().exir_op, run_on_fvp=True + And(), test_data(), And().aten_op, And().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") @@ -118,23 +126,31 @@ def test_logical_and_u85_BI(test_data: input_t2): @common.parametrize("test_data", Xor().test_data) def test_logical_xor_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Xor(), test_data, Xor().aten_op, Xor().exir_op) + pipeline = TosaPipelineMI[input_t2]( + Xor(), test_data(), Xor().aten_op, Xor().exir_op + ) pipeline.run() @common.parametrize("test_data", Xor().test_data) def test_logical_xor_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Xor(), test_data, Xor().aten_op, Xor().exir_op) + pipeline = TosaPipelineBI[input_t2]( + Xor(), test_data(), Xor().aten_op, Xor().exir_op + ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", Xor().test_data) -def test_logical_xor_u55_BI(test_data: input_t2): +def test_logical_xor_u55_BI_not_delegated(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - Xor(), test_data, "TOSA-0.80+BI+u55", {Xor().exir_op: 1} + Xor(), + test_data(), + {Xor().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -144,7 +160,7 @@ def test_logical_xor_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_logical_xor_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Xor(), test_data, Xor().aten_op, Xor().exir_op, run_on_fvp=True + Xor(), test_data(), Xor().aten_op, Xor().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") @@ -153,33 +169,37 @@ def test_logical_xor_u85_BI(test_data: input_t2): @common.parametrize("test_data", Or().test_data) def test_logical_or_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Or(), test_data, Or().aten_op, Or().exir_op) + pipeline = TosaPipelineMI[input_t2](Or(), test_data(), Or().aten_op, Or().exir_op) pipeline.run() @common.parametrize("test_data", Or().test_data) def test_logical_or_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Or(), test_data, Or().aten_op, Or().exir_op) + pipeline = TosaPipelineBI[input_t2](Or(), test_data(), Or().aten_op, Or().exir_op) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", Or().test_data) -def test_logical_or_u55_BI(test_data: input_t2): +def test_logical_or_u55_BI_not_delegated(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - Or(), test_data, "TOSA-0.80+BI+u55", {Or().exir_op: 1} + Or(), + test_data(), + {Or().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @common.parametrize("test_data", Or().test_data) @pytest.mark.xfail(reason="MLETORCH-706: Support ScalarType::Bool in EthosUBackend.") -@common.XfailIfNoCorstone320 # TODO: Refactor to use XfailIfNoCorstone320 once MLETORCH-706 is done +@common.XfailIfNoCorstone320 def test_logical_or_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Or(), test_data, Or().aten_op, Or().exir_op, run_on_fvp=True + Or(), test_data(), Or().aten_op, Or().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") @@ -188,23 +208,31 @@ def test_logical_or_u85_BI(test_data: input_t2): @common.parametrize("test_data", Not().test_data) def test_logical_not_tosa_MI(test_data: input_t2): - pipeline = TosaPipelineMI[input_t2](Not(), test_data, Not().aten_op, Not().exir_op) + pipeline = TosaPipelineMI[input_t2]( + Not(), test_data(), Not().aten_op, Not().exir_op + ) pipeline.run() @common.parametrize("test_data", Not().test_data) def test_logical_not_tosa_BI(test_data: input_t2): - pipeline = TosaPipelineBI[input_t2](Not(), test_data, Not().aten_op, Not().exir_op) + pipeline = TosaPipelineBI[input_t2]( + Not(), test_data(), Not().aten_op, Not().exir_op + ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") pipeline.run() @common.parametrize("test_data", Not().test_data) -def test_logical_not_u55_BI(test_data: input_t2): +def test_logical_not_u55_BI_not_delegated(test_data: input_t2): # Tests that we don't delegate these ops since they are not supported on U55. pipeline = OpNotSupportedPipeline[input_t2]( - Not(), test_data, "TOSA-0.80+BI+u55", {Not().exir_op: 1} + Not(), + test_data(), + {Not().exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -214,7 +242,7 @@ def test_logical_not_u55_BI(test_data: input_t2): @common.XfailIfNoCorstone320 def test_logical_not_u85_BI(test_data: input_t2): pipeline = EthosU85PipelineBI[input_t2]( - Not(), test_data, Not().aten_op, Not().exir_op, run_on_fvp=True + Not(), test_data(), Not().aten_op, Not().exir_op, run_on_fvp=True ) pipeline.pop_stage("quantize") pipeline.pop_stage("check.quant_nodes") diff --git a/backends/arm/test/ops/test_logsoftmax.py b/backends/arm/test/ops/test_logsoftmax.py index 7068ee77e01..50132ba8211 100644 --- a/backends/arm/test/ops/test_logsoftmax.py +++ b/backends/arm/test/ops/test_logsoftmax.py @@ -5,6 +5,8 @@ from typing import Tuple +import pytest + import torch from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( @@ -29,20 +31,20 @@ def forward(self, x): return self.log_softmax(x) test_data = { - "ones": ((torch.ones(10, 10),), 1), - "ones_neg_dim": ((torch.ones(1, 3, 4),), -1), - "randn_neg_dim": ((torch.randn(1, 5, 8, 7),), -3), - "zeros": ((torch.zeros(1, 8, 5, 2),), 0), - "zeros_neg_dim": ((torch.zeros(1, 7, 8, 9),), -4), - "rand": ((torch.rand(1, 2, 5, 8),), 2), - "rand_neg_dim": ((torch.rand(1, 10, 8, 10),), -2), - "randn_mult_batches": ((torch.randn(2, 10, 10, 10),), 3), + "ones": lambda: ((torch.ones(10, 10),), 1), + "ones_neg_dim": lambda: ((torch.ones(1, 3, 4),), -1), + "randn_neg_dim": lambda: ((torch.randn(1, 5, 8, 7),), -3), + "zeros": lambda: ((torch.zeros(1, 8, 5, 2),), 0), + "zeros_neg_dim": lambda: ((torch.zeros(1, 7, 8, 9),), -4), + "rand": lambda: ((torch.rand(1, 2, 5, 8),), 2), + "rand_neg_dim": lambda: ((torch.rand(1, 10, 8, 10),), -2), + "randn_mult_batches": lambda: ((torch.randn(2, 10, 10, 10),), 3), } @common.parametrize("test_data", LogSoftmax.test_data) def test_log_softmax_tosa_MI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = TosaPipelineMI[input_t1](LogSoftmax(dim), data, []) pipeline.add_stage_after( "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op] @@ -51,9 +53,10 @@ def test_log_softmax_tosa_MI(test_data): pipeline.run() +@pytest.mark.flaky(reruns=5) @common.parametrize("test_data", LogSoftmax.test_data) def test_log_softmax_tosa_BI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = TosaPipelineBI[input_t1](LogSoftmax(dim), data, []) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) @@ -69,8 +72,13 @@ def test_log_softmax_tosa_BI(test_data): ) @common.XfailIfNoCorstone300() def test_log_softmax_u55_BI(test_data): - data, dim = test_data - pipeline = EthosU55PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=True) + data, dim = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + LogSoftmax(dim), + data, + [], + run_on_fvp=True, + ) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() @@ -85,8 +93,13 @@ def test_log_softmax_u55_BI(test_data): ) @common.XfailIfNoCorstone320 def test_log_softmax_u85_BI(test_data): - data, dim = test_data - pipeline = EthosU85PipelineBI[input_t1](LogSoftmax(dim), data, [], run_on_fvp=True) + data, dim = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + LogSoftmax(dim), + data, + [], + run_on_fvp=True, + ) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) pipeline.run() diff --git a/backends/arm/test/ops/test_lshift.py b/backends/arm/test/ops/test_lshift.py index f6ddabf6612..e74e80deeed 100644 --- a/backends/arm/test/ops/test_lshift.py +++ b/backends/arm/test/ops/test_lshift.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. import torch +from executorch.backends.arm.test import common from executorch.backends.arm.test.common import ( XfailIfNoCorstone300, XfailIfNoCorstone320, @@ -14,7 +15,6 @@ TosaPipelineBI, TosaPipelineMI, ) -from parameterized import parameterized scalar_input_t = tuple[torch.Tensor, int] @@ -23,11 +23,20 @@ class LshiftScalar(torch.nn.Module): torch_op_MI = "torch.ops.aten.__lshift__.Scalar" torch_op_BI = "torch.ops.aten.bitwise_left_shift.Tensor" exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_left_shift_Tensor" - test_data = [ - ((torch.randint(-8, 8, (1, 12, 3, 4), dtype=torch.int8), 1),), - ((torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int16), 5),), - ((torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int32), 2),), - ] + test_data = { + "randint_neg_8_int8": ( + torch.randint(-8, 8, (1, 12, 3, 4), dtype=torch.int8), + 1, + ), + "randint_neg_100_int16": ( + torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int16), + 5, + ), + "randint_neg_100_int32": ( + torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int32), + 2, + ), + } def forward(self, x: torch.Tensor, shift: int): return x << shift @@ -39,33 +48,27 @@ def forward(self, x: torch.Tensor, shift: int): class LshiftTensor(torch.nn.Module): torch_op = "torch.ops.aten.bitwise_left_shift.Tensor" exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_left_shift_Tensor" - test_data = [ - ( - ( - torch.randint(-8, 8, (3, 3), dtype=torch.int8), - torch.randint(0, 4, (3, 3), dtype=torch.int8), - ), + test_data = { + "randint_neg_8_tensor_int8": ( + torch.randint(-8, 8, (3, 3), dtype=torch.int8), + torch.randint(0, 4, (3, 3), dtype=torch.int8), ), - ( - ( - torch.randint(-1024, 1024, (3, 3, 3), dtype=torch.int16), - torch.randint(0, 5, (3, 3, 3), dtype=torch.int16), - ), + "randint_neg_1024_tensor_int16": ( + torch.randint(-1024, 1024, (3, 3, 3), dtype=torch.int16), + torch.randint(0, 5, (3, 3, 3), dtype=torch.int16), ), - ( - ( - torch.randint(0, 127, (1, 2, 3, 3), dtype=torch.int32), - torch.randint(0, 5, (1, 2, 3, 3), dtype=torch.int32), - ), + "randint_0_tensor_int16": ( + torch.randint(0, 127, (1, 2, 3, 3), dtype=torch.int32), + torch.randint(0, 5, (1, 2, 3, 3), dtype=torch.int32), ), - ] + } def forward(self, x: torch.Tensor, shift: torch.Tensor): return x.bitwise_left_shift(shift) -@parameterized.expand(LshiftScalar.test_data) -def test_lshift_scalar_tosa_MI(test_data): +@common.parametrize("test_data", LshiftScalar.test_data) +def test_lshift_scalar_tosa_MI_scalar(test_data): TosaPipelineMI[scalar_input_t]( LshiftScalar(), test_data, @@ -74,18 +77,21 @@ def test_lshift_scalar_tosa_MI(test_data): ).run() -@parameterized.expand(LshiftScalar.test_data) -def test_lshift_scalar_tosa_BI(test_data): +@common.parametrize("test_data", LshiftScalar.test_data) +def test_bitwise_left_shift_tensor_tosa_BI_scalar(test_data): pipeline = TosaPipelineBI[scalar_input_t]( - LshiftScalar(), test_data, LshiftScalar.torch_op_BI, LshiftScalar.exir_op + LshiftScalar(), + test_data, + LshiftScalar.torch_op_BI, + LshiftScalar.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() -@parameterized.expand(LshiftScalar.test_data) +@common.parametrize("test_data", LshiftScalar.test_data) @XfailIfNoCorstone300 -def test_lshift_scalar_tosa_u55(test_data): +def test_bitwise_left_shift_tensor_u55_BI_scalar(test_data): pipeline = EthosU55PipelineBI[scalar_input_t]( LshiftScalar(), test_data, @@ -97,9 +103,9 @@ def test_lshift_scalar_tosa_u55(test_data): pipeline.run() -@parameterized.expand(LshiftScalar.test_data) +@common.parametrize("test_data", LshiftScalar.test_data) @XfailIfNoCorstone320 -def test_lshift_scalar_tosa_u85(test_data): +def test_bitwise_left_shift_tensor_u85_BI_scalar(test_data): pipeline = EthosU85PipelineBI[scalar_input_t]( LshiftScalar(), test_data, @@ -111,8 +117,8 @@ def test_lshift_scalar_tosa_u85(test_data): pipeline.run() -@parameterized.expand(LshiftTensor.test_data) -def test_lshift_tensor_tosa_MI(test_data): +@common.parametrize("test_data", LshiftTensor.test_data) +def test_lshift_scalar_tosa_MI(test_data): TosaPipelineMI[scalar_input_t]( LshiftTensor(), test_data, @@ -121,18 +127,21 @@ def test_lshift_tensor_tosa_MI(test_data): ).run() -@parameterized.expand(LshiftTensor.test_data) -def test_lshift_tensor_tosa_BI(test_data): +@common.parametrize("test_data", LshiftTensor.test_data) +def test_bitwise_left_shift_tensor_tosa_BI(test_data): pipeline = TosaPipelineBI[scalar_input_t]( - LshiftTensor(), test_data, LshiftTensor.torch_op, LshiftTensor.exir_op + LshiftTensor(), + test_data, + LshiftTensor.torch_op, + LshiftTensor.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() -@parameterized.expand(LshiftTensor.test_data) +@common.parametrize("test_data", LshiftTensor.test_data) @XfailIfNoCorstone300 -def test_lshift_tensor_tosa_u55(test_data): +def test_bitwise_left_shift_tensor_u55_BI(test_data): pipeline = EthosU55PipelineBI[scalar_input_t]( LshiftTensor(), test_data, @@ -144,9 +153,9 @@ def test_lshift_tensor_tosa_u55(test_data): pipeline.run() -@parameterized.expand(LshiftTensor.test_data) +@common.parametrize("test_data", LshiftTensor.test_data) @XfailIfNoCorstone320 -def test_lshift_tensor_tosa_u85(test_data): +def test_bitwise_left_shift_tensor_u85_BI(test_data): pipeline = EthosU85PipelineBI[scalar_input_t]( LshiftTensor(), test_data, diff --git a/backends/arm/test/ops/test_lt.py b/backends/arm/test/ops/test_lt.py index f5664b7895d..92298ca70fa 100644 --- a/backends/arm/test/ops/test_lt.py +++ b/backends/arm/test/ops/test_lt.py @@ -63,24 +63,27 @@ def get_inputs(self): op_lt_scalar_rank4_randn = LessThan(torch.randn(3, 2, 2, 2), 0.3) test_data_tensor = { - "lt_tensor_rank1_ones": op_lt_tensor_rank1_ones, - "lt_tensor_rank2_rand": op_lt_tensor_rank2_rand, - "lt_tensor_rank3_randn": op_lt_tensor_rank3_randn, - "lt_tensor_rank4_randn": op_lt_tensor_rank4_randn, + "lt_tensor_rank1_ones": lambda: op_lt_tensor_rank1_ones, + "lt_tensor_rank2_rand": lambda: op_lt_tensor_rank2_rand, + "lt_tensor_rank3_randn": lambda: op_lt_tensor_rank3_randn, + "lt_tensor_rank4_randn": lambda: op_lt_tensor_rank4_randn, } test_data_scalar = { - "lt_scalar_rank1_ones": op_lt_scalar_rank1_ones, - "lt_scalar_rank2_rand": op_lt_scalar_rank2_rand, - "lt_scalar_rank3_randn": op_lt_scalar_rank3_randn, - "lt_scalar_rank4_randn": op_lt_scalar_rank4_randn, + "lt_scalar_rank1_ones": lambda: op_lt_scalar_rank1_ones, + "lt_scalar_rank2_rand": lambda: op_lt_scalar_rank2_rand, + "lt_scalar_rank3_randn": lambda: op_lt_scalar_rank3_randn, + "lt_scalar_rank4_randn": lambda: op_lt_scalar_rank4_randn, } @common.parametrize("test_module", test_data_tensor) def test_lt_tensor_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), LessThan.aten_op_tensor, LessThan.exir_op + test_module(), + test_module().get_inputs(), + LessThan.aten_op_tensor, + LessThan.exir_op, ) pipeline.run() @@ -88,7 +91,10 @@ def test_lt_tensor_tosa_MI(test_module): @common.parametrize("test_module", test_data_scalar) def test_lt_scalar_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), LessThan.aten_op_scalar, LessThan.exir_op + test_module(), + test_module().get_inputs(), + LessThan.aten_op_scalar, + LessThan.exir_op, ) pipeline.run() @@ -96,7 +102,10 @@ def test_lt_scalar_tosa_MI(test_module): @common.parametrize("test_module", test_data_tensor) def test_lt_tensor_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), LessThan.aten_op_tensor, LessThan.exir_op + test_module(), + test_module().get_inputs(), + LessThan.aten_op_tensor, + LessThan.exir_op, ) pipeline.run() @@ -104,34 +113,39 @@ def test_lt_tensor_tosa_BI(test_module): @common.parametrize("test_module", test_data_scalar) def test_lt_scalar_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), LessThan.aten_op_tensor, LessThan.exir_op + test_module(), + test_module().get_inputs(), + LessThan.aten_op_tensor, + LessThan.exir_op, ) pipeline.run() @common.parametrize("test_module", test_data_tensor) @common.XfailIfNoCorstone300 -def test_lt_tensor_u55_BI(test_module): +def test_lt_tensor_u55_BI_not_delegated(test_module): # LessThan is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {LessThan.exir_op: 1}, + quantize=True, + u55_subset=True, ) pipeline.run() @common.parametrize("test_module", test_data_scalar) @common.XfailIfNoCorstone300 -def test_lt_scalar_u55_BI(test_module): +def test_lt_scalar_u55_BI_not_delegated(test_module): # LessThan is not supported on U55. pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), {LessThan.exir_op: 1}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -146,8 +160,8 @@ def test_lt_scalar_u55_BI(test_module): @common.XfailIfNoCorstone320 def test_lt_tensor_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), LessThan.aten_op_tensor, LessThan.exir_op, run_on_fvp=True, @@ -165,8 +179,8 @@ def test_lt_tensor_u85_BI(test_module): @common.XfailIfNoCorstone320 def test_lt_scalar_u85_BI(test_module): pipeline = EthosU85PipelineBI[input_t]( - test_module, - test_module.get_inputs(), + test_module(), + test_module().get_inputs(), LessThan.aten_op_tensor, LessThan.exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index 4db8c62bd88..a1fd3ea30ec 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -5,280 +5,183 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, -) -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.arm.tosa_specification import TosaSpecification - -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized - - -test_data_suite = [ - # (test_name, test_data, [kernel_size, stride, padding]) - ("zeros", torch.zeros(1, 1, 4, 8), [2, 2, 1]), - ("ones", torch.ones(1, 16, 50, 32), [4, 2, 0]), - ("rand", torch.rand(1, 16, 52, 16), [4, 3, 0]), - ("non_divisible", torch.rand(1, 16, 112, 112), [3, 2, 1]), -] - -test_data_suite_mult_batches = [ - ("randn", torch.randn(5, 16, 50, 32), [4, 2, 0]), -] - - -class TestMaxPool2d(unittest.TestCase): - """Tests MaxPool2d.""" - - class MaxPool2d(torch.nn.Module): - def __init__( - self, - kernel_size: int | Tuple[int, int], - stride: int | Tuple[int, int], - padding: int | Tuple[int, int], - ): - super().__init__() - self.max_pool_2d = torch.nn.MaxPool2d( - kernel_size=kernel_size, stride=stride, padding=padding - ) - - def forward(self, x): - return self.max_pool_2d(x) - - def _test_maxpool2d_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+MI", - ), - ) - .export() - .check(["torch.ops.aten.max_pool2d.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default" - ] - ) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_maxpool2d_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.max_pool2d.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) - .check_not( - [ - "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default" - ] - ) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - def _test_maxpool2d_tosa_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.tensor], - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.max_pool2d.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - - return tester +from executorch.backends.arm.test import common - @parameterized.expand(test_data_suite) - def test_maxpool2d_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - self._test_maxpool2d_tosa_MI_pipeline( - self.MaxPool2d(*model_params), (test_data,) - ) - - @parameterized.expand(test_data_suite) - def test_maxpool2d_tosa_BI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - self._test_maxpool2d_tosa_BI_pipeline( - self.MaxPool2d(*model_params), (test_data,) - ) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_maxpool2d_tosa_u55_BI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( - self.MaxPool2d(*model_params), - common.get_u55_compile_spec(), - (test_data,), - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_maxpool2d_tosa_u85_BI( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( - self.MaxPool2d(*model_params), - common.get_u85_compile_spec(), - (test_data,), - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) +test_data_suite = { + # (test_name, test_data, [kernel_size, stride, padding]) + "zeros": lambda: (torch.zeros(1, 1, 4, 8), [2, 2, 1]), + "ones": lambda: (torch.ones(1, 16, 50, 32), [4, 2, 0]), + "rand": lambda: (torch.rand(1, 16, 52, 16), [4, 3, 0]), + "non_divisible": lambda: (torch.rand(1, 16, 112, 112), [3, 2, 1]), +} - @parameterized.expand(test_data_suite_mult_batches) - def test_maxpool2d_tosa_MI_mult_batches( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - self._test_maxpool2d_tosa_MI_pipeline( - self.MaxPool2d(*model_params), (test_data,) - ) +test_data_suite_mult_batches = { + "randn": lambda: (torch.randn(5, 16, 50, 32), [4, 2, 0]), +} - @parameterized.expand(test_data_suite_mult_batches) - def test_maxpool2d_tosa_BI_mult_batches( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - self._test_maxpool2d_tosa_BI_pipeline( - self.MaxPool2d(*model_params), (test_data,) - ) - @parameterized.expand(test_data_suite_mult_batches) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP # TODO: MLETORCH-433 - def test_maxpool2d_tosa_u85_BI_mult_batches( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( - self.MaxPool2d(*model_params), - common.get_u85_compile_spec(), - (test_data,), - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) +aten_op = "torch.ops.aten.max_pool2d.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_max_pool2d_default" - @parameterized.expand(test_data_suite_mult_batches) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP # TODO: MLETORCH-433 - def test_maxpool2d_tosa_u55_BI_mult_batches( - self, - test_name: str, - test_data: torch.Tensor, - model_params: int | Tuple[int, int], - ): - tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( - self.MaxPool2d(*model_params), - common.get_u55_compile_spec(), - (test_data,), - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=(test_data,)) +input_t1 = Tuple[torch.Tensor] - reject_data_suite = [ - (MaxPool2d(1, 4, 0), torch.rand(1, 10, 10, 10)), - (MaxPool2d((1, 257), 1, 0), torch.rand(1, 16, 5, 300)), - (MaxPool2d((800, 90), 1, 0), torch.rand(1, 16, 850, 100)), - ] - @parameterized.expand(reject_data_suite) - def test_reject_maxpool2d_u55_BI( +class MaxPool2d(torch.nn.Module): + def __init__( self, - module: torch.nn.Module, - test_data: torch.tensor, + kernel_size: int | Tuple[int, int], + stride: int | Tuple[int, int], + padding: int | Tuple[int, int], ): - compile_spec = common.get_u55_compile_spec() - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() + super().__init__() + self.max_pool_2d = torch.nn.MaxPool2d( + kernel_size=kernel_size, stride=stride, padding=padding ) - ( - ArmTester( - module, - example_inputs=(test_data,), - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.max_pool2d.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check( - [ - "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default" - ] - ) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 0}) - ) + def forward(self, x): + return self.max_pool_2d(x) + + +@common.parametrize("test_data", test_data_suite) +def test_max_pool2d_tosa_MI(test_data: torch.Tensor): + test_data, model_params = test_data() + pipeline = TosaPipelineMI[input_t1]( + MaxPool2d(*model_params), (test_data,), aten_op, exir_op + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_max_pool2d_tosa_BI(test_data: torch.Tensor): + test_data, model_params = test_data() + pipeline = TosaPipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_max_pool2d_u55_BI(test_data: torch.Tensor): + test_data, model_params = test_data() + EthosU55PipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_ops=[], + symmetric_io_quantization=True, + run_on_fvp=True, + ).run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_max_pool2d_u85_BI(test_data: torch.Tensor): + test_data, model_params = test_data() + EthosU85PipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_ops=[], + symmetric_io_quantization=True, + run_on_fvp=True, + ).run() + + +@common.parametrize("test_data", test_data_suite_mult_batches) +def test_max_pool2d_tosa_MI_mult_batches(test_data: torch.Tensor): + test_data, model_params = test_data() + pipeline = TosaPipelineMI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite_mult_batches) +def test_max_pool2d_tosa_BI_mult_batches(test_data: torch.Tensor): + test_data, model_params = test_data() + pipeline = TosaPipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.run() + + +x_fail = {"randn": "MLETORCH-986: Numerical issues with mutli batches."} + + +@common.parametrize("test_data", test_data_suite_mult_batches, x_fail) +@common.XfailIfNoCorstone300 +def test_max_pool2d_u55_BI_mult_batches(test_data: torch.Tensor): + test_data, model_params = test_data() + EthosU55PipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + use_to_edge_transform_and_lower=True, + ).run() + + +@common.parametrize("test_data", test_data_suite_mult_batches, x_fail) +@common.XfailIfNoCorstone320 +def test_max_pool2d_u85_BI_mult_batches(test_data: torch.Tensor): + test_data, model_params = test_data() + EthosU85PipelineBI[input_t1]( + MaxPool2d(*model_params), + (test_data,), + aten_op, + exir_op, + run_on_fvp=True, + symmetric_io_quantization=True, + use_to_edge_transform_and_lower=True, + ).run() + + +reject_data_suite = { + "reject_1": lambda: (MaxPool2d(1, 4, 0), torch.rand(1, 10, 10, 10)), + "reject_2": lambda: (MaxPool2d((1, 257), 1, 0), torch.rand(1, 16, 5, 300)), + "reject_3": lambda: (MaxPool2d((800, 90), 1, 0), torch.rand(1, 16, 850, 100)), +} + + +@common.parametrize("test_data", reject_data_suite) +@common.XfailIfNoCorstone300 +def test_max_pool2d_u55_BI_failure_set(test_data: Tuple): + module, test_data = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + module, + (test_data,), + aten_op, + exir_op, + run_on_fvp=False, + symmetric_io_quantization=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.pop_stage("check_count.exir") + pipeline.run() diff --git a/backends/arm/test/ops/test_maximum.py b/backends/arm/test/ops/test_maximum.py index a255496d517..adcc7dc9cab 100644 --- a/backends/arm/test/ops/test_maximum.py +++ b/backends/arm/test/ops/test_maximum.py @@ -1,127 +1,75 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestMaximum(unittest.TestCase): - """Tests a single maximum op""" - - class Maximum(torch.nn.Module): - test_parameters = [ - ( - torch.FloatTensor([1, 2, 3, 5, 7]), - (torch.FloatTensor([2, 1, 2, 1, 10])), - ), - (torch.ones(1, 10, 4, 6), 2 * torch.ones(1, 10, 4, 6)), - (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), - (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), - (10000 * torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 1)), - ] - - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.maximum(x, y) - - def _test_maximum_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.maximum.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_maximum_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.maximum.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_maximum_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .to_edge() - .partition() - .to_executorch() - .serialize() - ) - - return tester - - @parameterized.expand(Maximum.test_parameters) - def test_maximum_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - self._test_maximum_tosa_MI_pipeline(self.Maximum(), test_data) - - @parameterized.expand(Maximum.test_parameters) - def test_maximum_tosa_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - self._test_maximum_tosa_BI_pipeline(self.Maximum(), test_data) - - @parameterized.expand(Maximum.test_parameters) - def test_maximum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - tester = self._test_maximum_ethos_BI_pipeline( - self.Maximum(), common.get_u55_compile_spec(), test_data - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(Maximum.test_parameters) - def test_maximum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - tester = self._test_maximum_ethos_BI_pipeline( - self.Maximum(), common.get_u85_compile_spec(), test_data - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +test_t = tuple[torch.Tensor, torch.Tensor] +aten_op = "torch.ops.aten.maximum.default" + + +class Maximum(torch.nn.Module): + test_parameters = { + "float_tensor": lambda: ( + torch.FloatTensor([1, 2, 3, 5, 7]), + (torch.FloatTensor([2, 1, 2, 1, 10])), + ), + "ones": lambda: (torch.ones(1, 10, 4, 6), 2 * torch.ones(1, 10, 4, 6)), + "rand_diff": lambda: (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), + "rand_same": lambda: (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), + "rand_large": lambda: ( + 10000 * torch.randn(1, 1, 4, 4), + torch.randn(1, 1, 4, 1), + ), + } + + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.maximum(x, y) + + +@common.parametrize("test_data", Maximum.test_parameters) +def test_maximum_tosa_MI(test_data: Tuple): + TosaPipelineMI[test_t](Maximum(), test_data(), aten_op).run() + + +@common.parametrize("test_data", Maximum.test_parameters) +def test_maximum_tosa_BI(test_data: Tuple): + TosaPipelineBI[test_t](Maximum(), test_data(), aten_op).run() + + +@common.parametrize("test_data", Maximum.test_parameters) +@common.XfailIfNoCorstone300 +def test_maximum_u55_BI(test_data: Tuple): + EthosU55PipelineBI[test_t]( + Maximum(), + test_data(), + aten_op, + run_on_fvp=True, + ).run() + + +@common.parametrize("test_data", Maximum.test_parameters) +@common.XfailIfNoCorstone320 +def test_maximum_u85_BI(test_data: Tuple): + EthosU85PipelineBI[test_t]( + Maximum(), + test_data(), + aten_op, + run_on_fvp=True, + ).run() diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py index 2351b0f9e9c..43063058805 100644 --- a/backends/arm/test/ops/test_mean_dim.py +++ b/backends/arm/test/ops/test_mean_dim.py @@ -20,10 +20,10 @@ class AdaptiveAveragePool2d(torch.nn.Module): test_data_suite = { # (test_name, test_data) - "zeros": (torch.zeros(1, 1280, 7, 7),), - "ones": (torch.ones(1, 1280, 7, 7),), - "rand": (torch.rand(1, 1280, 7, 7),), - "randn": (torch.randn(1, 1280, 7, 7),), + "zeros": lambda: (torch.zeros(1, 1280, 7, 7),), + "ones": lambda: (torch.ones(1, 1280, 7, 7),), + "rand": lambda: (torch.rand(1, 1280, 7, 7),), + "randn": lambda: (torch.randn(1, 1280, 7, 7),), } aten_op = "torch.ops.aten.adaptive_avg_pool2d.default" exir_op = "executorch_exir_dialects_edge__ops_aten_mean_dim" @@ -40,7 +40,7 @@ def forward(self, x): def test_adaptive_avg_pool2d_tosa_MI(test_data): TosaPipelineMI[input_t]( AdaptiveAveragePool2d(), - test_data, + test_data(), AdaptiveAveragePool2d.aten_op, AdaptiveAveragePool2d.exir_op, ).run() @@ -50,38 +50,18 @@ def test_adaptive_avg_pool2d_tosa_MI(test_data): def test_adaptive_avg_pool2d_tosa_BI(test_data): TosaPipelineBI[input_t]( AdaptiveAveragePool2d(), - test_data, + test_data(), AdaptiveAveragePool2d.aten_op, AdaptiveAveragePool2d.exir_op, ).run() @common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite) -def test_adaptive_avg_pool2d_u55(test_data): - EthosU55PipelineBI[input_t]( - AdaptiveAveragePool2d(), - test_data, - AdaptiveAveragePool2d.aten_op, - AdaptiveAveragePool2d.exir_op, - ).run() - - -@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite) -def test_adaptive_avg_pool2d_u85(test_data): - EthosU85PipelineBI[input_t]( - AdaptiveAveragePool2d(), - test_data, - AdaptiveAveragePool2d.aten_op, - AdaptiveAveragePool2d.exir_op, - ).run() - - -@common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite) -@common.SkipIfNoCorstone300 -def test_adaptive_avg_pool2d_u55_on_fvp(test_data): +@common.XfailIfNoCorstone300 +def test_adaptive_avg_pool2d_u55_BI(test_data): EthosU55PipelineBI[input_t]( AdaptiveAveragePool2d(), - test_data, + test_data(), AdaptiveAveragePool2d.aten_op, AdaptiveAveragePool2d.exir_op, run_on_fvp=True, @@ -89,11 +69,11 @@ def test_adaptive_avg_pool2d_u55_on_fvp(test_data): @common.parametrize("test_data", AdaptiveAveragePool2d.test_data_suite) -@common.SkipIfNoCorstone320 -def test_adaptive_avg_pool2d_u85_on_fvp(test_data): +@common.XfailIfNoCorstone320 +def test_adaptive_avg_pool2d_u85_BI(test_data): EthosU85PipelineBI[input_t]( AdaptiveAveragePool2d(), - test_data, + test_data(), AdaptiveAveragePool2d.aten_op, AdaptiveAveragePool2d.exir_op, run_on_fvp=True, @@ -102,14 +82,14 @@ def test_adaptive_avg_pool2d_u85_on_fvp(test_data): class MeanDim(torch.nn.Module): test_data_suite: dict[str, tuple] = { - "zeros": (torch.zeros(1, 1280, 7, 7), -1, True), - "ones": (torch.ones(1, 1280, 7, 7), (-1, 2), False), - "rand": ( + "zeros": lambda: (torch.zeros(1, 1280, 7, 7), -1, True), + "ones": lambda: (torch.ones(1, 1280, 7, 7), (-1, 2), False), + "rand": lambda: ( torch.rand(1, 1280, 7, 7), (-1), True, ), - "randn": ( + "randn": lambda: ( torch.randn(1, 1280, 7, 7), (-1, -2, -3), False, @@ -128,20 +108,22 @@ def forward(self, x: torch.Tensor): @common.parametrize("test_data", MeanDim.test_data_suite) -def test_mean_tosa_MI(test_data): +def test_mean_dim_tosa_MI(test_data): + test_data, dim, keep_dim = test_data() TosaPipelineMI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), + MeanDim(dim, keep_dim), + (test_data,), MeanDim.torch_op, MeanDim.exir_op, ).run() @common.parametrize("test_data", MeanDim.test_data_suite) -def test_mean_tosa_BI(test_data): +def test_mean_dim_tosa_BI(test_data): + test_data, dim, keep_dim = test_data() pipeline = TosaPipelineBI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), + MeanDim(dim, keep_dim), + (test_data,), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition ) pipeline.change_args("run_method_and_compare_outputs", qtol=1) @@ -150,10 +132,11 @@ def test_mean_tosa_BI(test_data): @common.parametrize("test_data", MeanDim.test_data_suite) @common.XfailIfNoCorstone300 -def test_mean_u55_BI(test_data): +def test_mean_dim_u55_BI(test_data): + test_data, dim, keep_dim = test_data() pipeline = EthosU55PipelineBI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), + MeanDim(dim, keep_dim), + (test_data,), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition run_on_fvp=True, ) @@ -163,10 +146,11 @@ def test_mean_u55_BI(test_data): @common.parametrize("test_data", MeanDim.test_data_suite) @common.XfailIfNoCorstone320 -def test_mean_u85_BI(test_data): +def test_mean_dim_u85_BI(test_data): + test_data, dim, keep_dim = test_data() pipeline = EthosU85PipelineBI[input_t]( - MeanDim(test_data[1], test_data[2]), - (test_data[0],), + MeanDim(dim, keep_dim), + (test_data,), "torch.ops.aten.sum.dim_IntList", # Just check for sum op included in the mean decomposition run_on_fvp=True, ) diff --git a/backends/arm/test/ops/test_minimum.py b/backends/arm/test/ops/test_minimum.py index 04693a46435..27922cda5e0 100644 --- a/backends/arm/test/ops/test_minimum.py +++ b/backends/arm/test/ops/test_minimum.py @@ -1,130 +1,75 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestMinimum(unittest.TestCase): - """Tests a single minimum op""" - - class Minimum(torch.nn.Module): - test_parameters = [ - ( - torch.FloatTensor([1, 2, 3, 5, 7]), - (torch.FloatTensor([2, 1, 2, 1, 10])), - ), - (torch.ones(1, 10, 4, 6), 2 * torch.ones(1, 10, 4, 6)), - (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), - (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), - (10000 * torch.randn(1, 1, 4, 4), torch.randn(1, 1, 4, 1)), - ] - - def __init__(self): - super().__init__() - - def forward(self, x, y): - return torch.minimum(x, y) - - def _test_minimum_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.minimum.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_minimum_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.minimum.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_minimum_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .to_edge() - .partition() - .to_executorch() - .serialize() - ) - - return tester - - @parameterized.expand(Minimum.test_parameters) - def test_minimum_tosa_MI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - self._test_minimum_tosa_MI_pipeline(self.Minimum(), test_data) - - @parameterized.expand(Minimum.test_parameters) - def test_minimum_tosa_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - self._test_minimum_tosa_BI_pipeline(self.Minimum(), test_data) - - @parameterized.expand(Minimum.test_parameters) - def test_minimum_u55_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - tester = self._test_minimum_ethos_BI_pipeline( - self.Minimum(), common.get_u55_compile_spec(), test_data - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(Minimum.test_parameters) - def test_minimum_u85_BI(self, operand1: torch.Tensor, operand2: torch.Tensor): - test_data = (operand1, operand2) - tester = self._test_minimum_ethos_BI_pipeline( - self.Minimum(), common.get_u85_compile_spec(), test_data - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs( - qtol=1, - inputs=test_data, - ) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +test_t = tuple[torch.Tensor, torch.Tensor] +aten_op = "torch.ops.aten.minimum.default" + + +class Minimum(torch.nn.Module): + test_parameters = { + "float_tensor": lambda: ( + torch.FloatTensor([1, 2, 3, 5, 7]), + (torch.FloatTensor([2, 1, 2, 1, 10])), + ), + "ones": lambda: (torch.ones(1, 10, 4, 6), 2 * torch.ones(1, 10, 4, 6)), + "rand_diff": lambda: (torch.randn(1, 1, 4, 4), torch.ones(1, 1, 4, 1)), + "rand_same": lambda: (torch.randn(1, 3, 4, 4), torch.randn(1, 3, 4, 4)), + "rand_large": lambda: ( + 10000 * torch.randn(1, 1, 4, 4), + torch.randn(1, 1, 4, 1), + ), + } + + def __init__(self): + super().__init__() + + def forward(self, x, y): + return torch.minimum(x, y) + + +@common.parametrize("test_data", Minimum.test_parameters) +def test_minimum_tosa_MI(test_data: Tuple): + TosaPipelineMI[test_t](Minimum(), test_data(), aten_op).run() + + +@common.parametrize("test_data", Minimum.test_parameters) +def test_minimum_tosa_BI(test_data: Tuple): + TosaPipelineBI[test_t](Minimum(), test_data(), aten_op).run() + + +@common.parametrize("test_data", Minimum.test_parameters) +@common.XfailIfNoCorstone300 +def test_minimum_u55_BI(test_data: Tuple): + EthosU55PipelineBI[test_t]( + Minimum(), + test_data(), + aten_op, + run_on_fvp=True, + ).run() + + +@common.parametrize("test_data", Minimum.test_parameters) +@common.XfailIfNoCorstone320 +def test_minimum_u85_BI(test_data: Tuple): + EthosU85PipelineBI[test_t]( + Minimum(), + test_data(), + aten_op, + run_on_fvp=True, + ).run() diff --git a/backends/arm/test/ops/test_mm.py b/backends/arm/test/ops/test_mm.py index a4503280db9..a5a3b4b98b9 100644 --- a/backends/arm/test/ops/test_mm.py +++ b/backends/arm/test/ops/test_mm.py @@ -4,7 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Callable +from typing import Tuple import pytest import torch @@ -15,19 +15,18 @@ TosaPipelineBI, TosaPipelineMI, ) -from parameterized import parameterized test_t = tuple[torch.Tensor, torch.Tensor] class MM(torch.nn.Module): - test_data_generators = [ - lambda: (torch.rand(3, 5), torch.rand(5, 2)), - lambda: (torch.rand(1, 1), torch.rand(1, 1)), - lambda: (torch.ones(55, 3), torch.ones(3, 44)), - lambda: (10000 * torch.randn(1, 10), torch.randn(10, 5)), - lambda: (-10 * torch.randn(32, 64), 5 + 5 * torch.randn(64, 32)), - ] + test_data_generators = { + "rand_2d": lambda: (torch.rand(3, 5), torch.rand(5, 2)), + "rand_same": lambda: (torch.rand(1, 1), torch.rand(1, 1)), + "ones": lambda: (torch.ones(55, 3), torch.ones(3, 44)), + "randn_large": lambda: (10000 * torch.randn(1, 10), torch.randn(10, 5)), + "rand_neg": lambda: (-10 * torch.randn(32, 64), 5 + 5 * torch.randn(64, 32)), + } aten_op = "torch.ops.aten.mm.default" exir_op = "executorch_exir_dialects_edge__ops_aten_mm_default" @@ -35,43 +34,35 @@ def forward(self, x, y): return torch.mm(x, y) -@parameterized.expand(MM.test_data_generators) -def test_mm_tosa_MI(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() - TosaPipelineMI[test_t](MM(), test_data, MM.aten_op).run() +@common.parametrize("test_data", MM.test_data_generators) +def test_mm_tosa_MI(test_data: Tuple): + TosaPipelineMI[test_t](MM(), test_data(), MM.aten_op).run() -@parameterized.expand(MM.test_data_generators) -def test_mm_tosa_BI(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() - TosaPipelineBI[test_t](MM(), test_data, MM.aten_op, MM.exir_op).run() +@common.parametrize("test_data", MM.test_data_generators) +def test_mm_tosa_BI(test_data: Tuple): + TosaPipelineBI[test_t](MM(), test_data(), MM.aten_op, MM.exir_op).run() -@parameterized.expand(MM.test_data_generators) -def test_mm_tosa_u55(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() - EthosU55PipelineBI[test_t](MM(), test_data, MM.aten_op).run() - - -@parameterized.expand(MM.test_data_generators) +@common.parametrize("test_data", MM.test_data_generators) +@common.XfailIfNoCorstone300 @pytest.mark.flaky # Investigate flakiness (MLETORCH-870) -def test_mm_tosa_u85(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() - EthosU85PipelineBI[test_t](MM(), test_data, MM.aten_op, MM.exir_op).run() - - -@parameterized.expand(MM.test_data_generators) -@common.SkipIfNoCorstone300 -def test_mm_tosa_u55_on_fvp(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() - EthosU55PipelineBI[test_t](MM(), test_data, MM.aten_op, run_on_fvp=True).run() +def test_mm_u55_BI(test_data: Tuple): + EthosU55PipelineBI[test_t]( + MM(), + test_data(), + MM.aten_op, + run_on_fvp=True, + ).run() -@parameterized.expand(MM.test_data_generators) -@common.SkipIfNoCorstone320 -@pytest.mark.flaky # Investigate flakiness (MLETORCH-870) -def test_mm_tosa_u85_on_fvp(test_data_generator: Callable[[], tuple]): - test_data = test_data_generator() +@common.parametrize("test_data", MM.test_data_generators) +@common.XfailIfNoCorstone320 +def test_mm_u85_BI(test_data: Tuple): EthosU85PipelineBI[test_t]( - MM(), test_data, MM.aten_op, MM.exir_op, run_on_fvp=True + MM(), + test_data(), + MM.aten_op, + MM.exir_op, + run_on_fvp=True, ).run() diff --git a/backends/arm/test/ops/test_mul.py b/backends/arm/test/ops/test_mul.py index 739864a4982..f960f348a87 100644 --- a/backends/arm/test/ops/test_mul.py +++ b/backends/arm/test/ops/test_mul.py @@ -1,226 +1,155 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest -import pytest +from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized -test_data_suite = [ +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x +aten_op = "torch.ops.aten.mul.Tensor" + +test_data_suite = { # (test_name, input, other,) See torch.mul() for info - ( - "op_mul_rank1_rand", + "op_mul_rank1_rand": lambda: ( torch.rand(5) * 3.7, torch.rand(5) * 1.5, ), - ( - "op_mul_rank2_rand", + "op_mul_rank2_rand": lambda: ( torch.rand(4, 5), torch.rand(1, 5), ), - ( - "op_mul_rank3_randn", + "op_mul_rank3_randn": lambda: ( torch.randn(10, 5, 2), torch.randn(10, 5, 2), ), - ( - "op_mul_rank4_randn", + "op_mul_rank4_randn": lambda: ( torch.randn(1, 10, 25, 20), torch.randn(1, 10, 25, 20), ), - ( - "op_mul_rank4_ones_mul_negative", + "op_mul_rank4_ones_mul_negative": lambda: ( torch.ones(1, 10, 25, 20), (-1) * torch.ones(1, 10, 25, 20), ), - ( - "op_mul_rank4_negative_large_rand", + "op_mul_rank4_negative_large_rand": lambda: ( (-200) * torch.rand(1, 10, 25, 20), torch.rand(1, 1, 1, 20), ), - ( - "op_mul_rank4_large_randn", + "op_mul_rank4_large_randn": lambda: ( 200 * torch.randn(1, 10, 25, 20), torch.rand(1, 10, 25, 1), ), -] +} -test_data_suite_2 = [ +test_data_suite_2 = { # (test_name, input, other,) See torch.mul() for info - ( - "op_mul_rank2_rand", + "op_mul_rank2_rand": lambda: ( torch.rand(4, 5), torch.rand(5), ), - ( - "op_mul_rank3_randn", + "op_mul_rank3_randn": lambda: ( torch.randn(10, 5, 2), torch.randn(5, 2), ), - ( - "op_mul_rank4_randn", + "op_mul_rank4_randn": lambda: ( torch.randn(1, 10, 25, 20), torch.randn(1, 25, 20), ), - ( - "op_mul_rank4_randn_2", + "op_mul_rank4_randn_2": lambda: ( torch.randn(1, 25, 1), torch.randn(1, 3, 25, 10), ), -] - - -class TestMul(unittest.TestCase): - class Mul(torch.nn.Module): - - def forward( - self, - input_: torch.Tensor, - other_: torch.Tensor, - ): - return input_ * other_ - - def _test_mul_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+MI", - ), - ) - .export() - .check_count({"torch.ops.aten.mul.Tensor": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_mul_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor, torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.mul.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1.0) - ) - - def _test_mul_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: tuple[torch.Tensor, torch.Tensor], - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.mul.Tensor": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_mul_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_mul_tosa_MI_pipeline(self.Mul(), test_data) - - @parameterized.expand(test_data_suite_2) - def test_mul_diff_input_ranks_tosa_MI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_mul_tosa_MI_pipeline(self.Mul(), test_data) +} - @parameterized.expand(test_data_suite_2) - def test_mul_diff_input_ranks_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_mul_tosa_BI_pipeline(self.Mul(), test_data) - @parameterized.expand(test_data_suite) - def test_mul_tosa_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - - test_data = (input_, other_) - self._test_mul_tosa_BI_pipeline(self.Mul(), test_data) +class Mul(torch.nn.Module): - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_mul_u55_BI( - self, - test_name: str, - input_: torch.Tensor, - other_: torch.Tensor, - ): - test_data = (input_, other_) - self._test_mul_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Mul(), test_data - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_mul_u85_BI( + def forward( self, - test_name: str, input_: torch.Tensor, other_: torch.Tensor, ): - test_data = (input_, other_) - self._test_mul_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Mul(), test_data - ) + return input_ * other_ + + +@common.parametrize("test_data", test_data_suite) +def test_mul_tensor_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite_2) +def test_mul_tensor_tosa_MI_diff_input_ranks(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite_2) +def test_mul_tensor_tosa_BI_diff_input_ranks(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_mul_tensor_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_mul_tensor_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_mul_tensor_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Mul(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_ne.py b/backends/arm/test/ops/test_ne.py index 7ab75827cad..2ceacdb31b9 100644 --- a/backends/arm/test/ops/test_ne.py +++ b/backends/arm/test/ops/test_ne.py @@ -126,11 +126,12 @@ def test_ne_tensor_u55_BI(test_module): pipeline = OpNotSupportedPipeline[input_t]( test_module, test_module.get_inputs(), - "TOSA-0.80+BI+u55", { NotEqual.decomposed_exir_ops[0]: 1, NotEqual.decomposed_exir_ops[1]: 1, }, + quantize=True, + u55_subset=True, ) pipeline.run() @@ -143,11 +144,12 @@ def test_ne_scalar_u55_BI(test_module): pipeline = OpNotSupportedPipeline[input_t]( test_module, test_module.get_inputs(), - "TOSA-0.80+BI+u55", { NotEqual.decomposed_exir_ops[0]: 1, NotEqual.decomposed_exir_ops[1]: 1, }, + quantize=True, + u55_subset=True, n_expected_delegates=1, ) pipeline.run() diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py index 50db1231b41..3bbfdb69903 100644 --- a/backends/arm/test/ops/test_permute.py +++ b/backends/arm/test/ops/test_permute.py @@ -5,185 +5,105 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, ) -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.arm.test.tester.test_pipeline import OpNotSupportedPipeline -from executorch.backends.arm.tosa_specification import TosaSpecification -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized from torchvision.ops import Permute -test_data_suite = [ +input_t1 = Tuple[torch.Tensor] # Input x + +aten_op = "torch.ops.aten.permute.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_permute_default" + +test_data_suite = { # (test_name,test_data,dims) - ("rank_2", torch.rand(10, 10), [1, 0]), - ("rank_3", torch.rand(10, 10, 10), [2, 0, 1]), - ("rank_3", torch.rand(10, 10, 10), [1, 2, 0]), - ("rank_4", torch.rand(1, 5, 1, 10), [0, 2, 3, 1]), - ("rank_4", torch.rand(1, 2, 5, 10), [1, 0, 2, 3]), - ("rank_4", torch.rand(1, 10, 10, 5), [2, 0, 1, 3]), -] - - -class TestPermute(unittest.TestCase): - """Tests Permute Operator.""" - - class Permute(torch.nn.Module): - - def __init__(self, dims: list[int]): - super().__init__() - - self.permute = Permute(dims=dims) - - def forward(self, x): - return self.permute(x) - - def _test_permute_tosa_MI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.permute.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_permute_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.permute.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_permute_ethos_BI_pipeline( - self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: Tuple[torch.Tensor], - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.permute.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_permute_tosa_MI( - self, test_name: str, test_data: torch.Tensor, dims: list[int] - ): - self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,)) - self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_permute_tosa_BI( - self, test_name: str, test_data: torch.Tensor, dims: list[int] - ): - self._test_permute_tosa_BI_pipeline(self.Permute(dims=dims), (test_data,)) - - # Expected to fail as TOSA.Transpose is not supported by Ethos-U55. - @parameterized.expand(test_data_suite[0:1]) - @pytest.mark.corstone_fvp - def test_permute_u55_BI( - self, test_name: str, test_data: torch.Tensor, dims: list[int] - ): - self._test_permute_ethos_BI_pipeline( - self.Permute(dims=dims), common.get_u55_compile_spec(), (test_data,) - ) - - @parameterized.expand(test_data_suite[:-2]) - @pytest.mark.corstone_fvp - def test_permute_u85_BI( - self, test_name: str, test_data: torch.Tensor, dims: list[int] - ): - self._test_permute_ethos_BI_pipeline( - self.Permute(dims=dims), common.get_u85_compile_spec(), (test_data,) - ) - - # Fails since on FVP since N > 1 is not supported. MLETORCH-517 - @parameterized.expand(test_data_suite[-2:]) - @pytest.mark.corstone_fvp - @conftest.expectedFailureOnFVP - def test_permute_u85_BI_xfails( - self, test_name: str, test_data: torch.Tensor, dims: list[int] - ): - self._test_permute_ethos_BI_pipeline( - self.Permute(dims=dims), common.get_u85_compile_spec(), (test_data,) - ) - - -reject_data_suite = { - "int8_r3_axes_product": ([1, 700, 1000], [2, 1, 0], torch.int8), - "int8_r5_axes_product": ([1, 1, 1, 700, 1000], [0, 1, 2, 3, 4], torch.int8), - "int8_r4_NH_too_large": ([700, 100, 1, 1], [0, 1, 3, 2], torch.int8), - "int32_r5_no_support": ([2, 2, 2, 2, 2], [3, 4, 2, 1, 0], torch.int32), + "rank_2": lambda: (torch.rand(10, 10), [1, 0]), + "rank_3": lambda: (torch.rand(10, 10, 10), [2, 0, 1]), + "rank_3_2": lambda: (torch.rand(10, 10, 10), [1, 2, 0]), + "rank_4": lambda: (torch.rand(1, 5, 1, 10), [0, 2, 3, 1]), + "rank_4_2": lambda: (torch.rand(1, 2, 5, 10), [1, 0, 2, 3]), + "rank_4_3": lambda: (torch.rand(1, 10, 10, 5), [2, 0, 1, 3]), +} + + +class SimplePermute(torch.nn.Module): + + def __init__(self, dims: list[int]): + super().__init__() + + self.permute = Permute(dims=dims) + + def forward(self, x): + return self.permute(x) + + +@common.parametrize("test_data", test_data_suite) +def test_permute_tosa_MI(test_data: torch.Tensor): + test_data, dims = test_data() + pipeline = TosaPipelineMI[input_t1]( + SimplePermute(dims=dims), + (test_data,), + aten_op, + exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_permute_tosa_BI(test_data: torch.Tensor): + test_data, dims = test_data() + pipeline = TosaPipelineBI[input_t1]( + SimplePermute(dims=dims), + (test_data,), + aten_op, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.run() + + +x_fails = { + "rank_4_2": "AssertionError: Output 0 does not match reference output.", + "rank_4_3": "AssertionError: Output 0 does not match reference output.", } -input_t = tuple[torch.Tensor] - - -@common.parametrize("test_data", reject_data_suite) -def test_permute_u55_BI_not_delegated(test_data): - # Tests that we don't delegate these ops since they are not supported on U55. - shape, permutation, dtype = test_data - data = ((torch.rand(shape) * 10).to(dtype),) - pipeline = OpNotSupportedPipeline[input_t]( - TestPermute.Permute(dims=permutation), - data, - "TOSA-0.80+BI+u55", - {"executorch_exir_dialects_edge__ops_aten_permute_copy_default": 1}, + + +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone300 +def test_permute_u55_BI(test_data): + test_data, dims = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + SimplePermute(dims=dims), + (test_data,), + aten_op, + exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default", + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +# Fails since on FVP since N > 1 is not supported. MLETORCH-517 +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone320 +def test_permute_u85_BI(test_data: torch.Tensor): + test_data, dims = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + SimplePermute(dims=dims), + (test_data,), + aten_op, + exir_ops="executorch_exir_dialects_edge__ops_aten_permute_copy_default", + run_on_fvp=True, + symmetric_io_quantization=True, ) pipeline.run() diff --git a/backends/arm/test/ops/test_pow.py b/backends/arm/test/ops/test_pow.py index 618acf50fc2..98b23870f21 100644 --- a/backends/arm/test/ops/test_pow.py +++ b/backends/arm/test/ops/test_pow.py @@ -81,8 +81,14 @@ def forward(self, x: torch.Tensor): return torch.pow(x, self.exp) -@common.parametrize("test_data", Pow_TensorTensor.test_data) -def test_pow_tensor_tensor_MI(test_data: Pow_TensorTensor.input_t): +x_fail = { + "zero_base_zero_exp": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "neg_base_zero_exp": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", +} + + +@common.parametrize("test_data", Pow_TensorTensor.test_data, x_fail, strict=False) +def test_pow_tensor_tensor_tosa_MI(test_data: Pow_TensorTensor.input_t): pipeline = TosaPipelineMI[Pow_TensorTensor.input_t]( Pow_TensorTensor(), test_data(), @@ -92,8 +98,18 @@ def test_pow_tensor_tensor_MI(test_data: Pow_TensorTensor.input_t): pipeline.run() -@common.parametrize("test_data", Pow_TensorScalar.test_data) -def test_pow_tensor_scalar_MI(test_data: Pow_TensorScalar.input_t): +x_fail = { + "exp_minus_three": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "exp_minus_one": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "exp_zero": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "exp_one": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "exp_two": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", + "non_neg_base_exp_pos_decimal": "TOSA constraints: If x == 0 and y ⇐ 0, the result is undefined.", +} + + +@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False) +def test_pow_tensor_scalar_tosa_MI(test_data: Pow_TensorScalar.input_t): base, exp = test_data() pipeline = TosaPipelineMI[Pow_TensorScalar.input_t]( Pow_TensorScalar(exp), @@ -104,8 +120,8 @@ def test_pow_tensor_scalar_MI(test_data: Pow_TensorScalar.input_t): pipeline.run() -@common.parametrize("test_data", Pow_TensorScalar.test_data) -def test_pow_tensor_scalar_BI(test_data: Pow_TensorScalar.input_t): +@common.parametrize("test_data", Pow_TensorScalar.test_data, x_fail, strict=False) +def test_pow_tensor_scalar_tosa_BI(test_data: Pow_TensorScalar.input_t): base, exp = test_data() pipeline = TosaPipelineBI[Pow_TensorScalar.input_t]( Pow_TensorScalar(exp), diff --git a/backends/arm/test/ops/test_reciprocal.py b/backends/arm/test/ops/test_reciprocal.py index b3233d02a92..92a33346015 100644 --- a/backends/arm/test/ops/test_reciprocal.py +++ b/backends/arm/test/ops/test_reciprocal.py @@ -1,120 +1,91 @@ -# Copyright 2024 Arm Limited and/or its affiliates. -# All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest -import pytest +from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from parameterized import parameterized - -test_data_t = tuple[str, torch.Tensor] -test_data_suite: list[test_data_t] = [ - ( - "op_reciprocal_rank1_ones", - torch.ones(5), - ), - ( - "op_reciprocal_rank1_rand", - torch.rand(5) * 5, - ), - ("op_reciprocal_rank1_negative_ones", torch.ones(5) * (-1)), - ("op_reciprocal_rank4_ones", torch.ones(1, 10, 25, 20)), - ("op_reciprocal_rank4_negative_ones", (-1) * torch.ones(1, 10, 25, 20)), - ("op_reciprocal_rank4_ones_reciprocal_negative", torch.ones(1, 10, 25, 20)), - ("op_reciprocal_rank4_large_rand", 200 * torch.rand(1, 10, 25, 20)), - ("op_reciprocal_rank4_negative_large_rand", (-200) * torch.rand(1, 10, 25, 20)), - ("op_reciprocal_rank4_large_randn", 200 * torch.randn(1, 10, 25, 20) + 1), -] - - -class TestReciprocal(unittest.TestCase): - """Tests reciprocal""" - - class Reciprocal(torch.nn.Module): - - def forward(self, input_: torch.Tensor): - return input_.reciprocal() - - def _test_reciprocal_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.reciprocal.default": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_reciprocal_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.reciprocal.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_reciprocal_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.reciprocal.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(qtol=1, inputs=test_data) - - @parameterized.expand(test_data_suite) - def test_reciprocal_tosa_MI(self, test_name: str, input_: torch.Tensor): - test_data = (input_,) - self._test_reciprocal_tosa_MI_pipeline(self.Reciprocal(), test_data) - - @parameterized.expand(test_data_suite) - def test_reciprocal_tosa_BI(self, test_name: str, input_: torch.Tensor): - - test_data = (input_,) - self._test_reciprocal_tosa_BI_pipeline(self.Reciprocal(), test_data) - - @parameterized.expand(test_data_suite) - @pytest.mark.corstone_fvp - def test_reciprocal_u55_BI(self, test_name: str, input_: torch.Tensor): - test_data = (input_,) - self._test_reciprocal_u55_BI_pipeline(self.Reciprocal(), test_data) + +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor] # Input x, Input y +aten_op = "torch.ops.aten.reciprocal.default" + +test_data_suite = { + "op_reciprocal_rank1_ones": lambda: torch.ones(5), + "op_reciprocal_rank1_rand": lambda: torch.rand(5) * 5, + "op_reciprocal_rank1_negative_ones": lambda: torch.ones(5) * (-1), + "op_reciprocal_rank4_ones": lambda: torch.ones(1, 10, 25, 20), + "op_reciprocal_rank4_negative_ones": lambda: (-1) * torch.ones(1, 10, 25, 20), + "op_reciprocal_rank4_ones_reciprocal_negative": lambda: torch.ones(1, 10, 25, 20), + "op_reciprocal_rank4_large_rand": lambda: 200 * torch.rand(1, 10, 25, 20), + "op_reciprocal_rank4_negative_large_rand": lambda: (-200) + * torch.rand(1, 10, 25, 20), + "op_reciprocal_rank4_large_randn": lambda: 200 * torch.randn(1, 10, 25, 20) + 1, +} + + +class Reciprocal(torch.nn.Module): + + def forward(self, input_: torch.Tensor): + return input_.reciprocal() + + +@common.parametrize("test_data", test_data_suite) +def test_reciprocal_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Reciprocal(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_reciprocal_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Reciprocal(), + (test_data(),), + aten_op, + exir_op=[], + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone300 +def test_reciprocal_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Reciprocal(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=False, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +@common.XfailIfNoCorstone320 +def test_reciprocal_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Reciprocal(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=False, + symmetric_io_quantization=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py index 3fc64c89be1..e27a65e76da 100644 --- a/backends/arm/test/ops/test_relu.py +++ b/backends/arm/test/ops/test_relu.py @@ -1,134 +1,91 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, -) from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.arm.tosa_specification import TosaSpecification -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +aten_op = "torch.ops.aten.relu.default" +exir_op = "executorch_exir_dialects_edge__ops_aten_relu_default" + +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(1, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestRelu(unittest.TestCase): - class Relu(torch.nn.Module): - def __init__(self): - super().__init__() - self.relu = torch.nn.ReLU() - - def forward(self, x): - return self.relu(x) - - def _test_relu_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.relu.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_relu_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.relu.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_relu_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.relu.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_relu_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - @parameterized.expand(test_data_suite) - def test_relu_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_relu_tosa_MI_pipeline(self.Relu(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_relu_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_relu_tosa_BI_pipeline(self.Relu(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_relu_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_relu_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Relu(), (test_data,) - ) - - @parameterized.expand(test_data_suite) - def test_relu_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_relu_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Relu(), (test_data,) - ) + "zeros": lambda: torch.zeros(1, 10, 10, 10), + "ones": lambda: torch.ones(10, 10, 10), + "rand": lambda: torch.rand(10, 10) - 0.5, + "randn_pos": lambda: torch.randn(10) + 10, + "randn_neg": lambda: torch.randn(10) - 10, + "ramp": lambda: torch.arange(-16, 16, 0.2), +} + + +class Relu(torch.nn.Module): + def __init__(self): + super().__init__() + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(x) + + +@common.parametrize("test_data", test_data_suite) +def test_relu_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Relu(), + (test_data(),), + aten_op, + exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_relu_tosa_BI(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Relu(), + (test_data(),), + aten_op, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_relu_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Relu(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=False, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_relu_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Relu(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=False, + symmetric_io_quantization=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py index da2770cfafe..3a7a37196ec 100644 --- a/backends/arm/test/ops/test_repeat.py +++ b/backends/arm/test/ops/test_repeat.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,115 +7,83 @@ # Tests the repeat op which copies the data of the input tensor (possibly with new data format) # -import unittest + from typing import Sequence, Tuple import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, -) from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.arm.tosa_specification import TosaSpecification - -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.backend_details import CompileSpec -from parameterized import parameterized - - -class TestSimpleRepeat(unittest.TestCase): - """Tests Tensor.repeat for different ranks and dimensions.""" - - class Repeat(torch.nn.Module): - # (input tensor, multiples) - test_parameters = [ - (torch.randn(3), (2,)), - (torch.randn(3, 4), (2, 1)), - (torch.randn(1, 1, 2, 2), (1, 2, 3, 4)), - (torch.randn(3), (2, 2)), - (torch.randn(3), (1, 2, 3)), - (torch.randn((3, 3)), (2, 2, 2)), - (torch.randn((3, 3, 3)), (2, 1, 2, 4)), - ] - - def forward(self, x: torch.Tensor, multiples: Sequence): - return x.repeat(multiples) - - def _test_repeat_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.repeat.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.repeat.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_repeat_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.repeat.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.repeat.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_repeat_ethosu_pipeline( - self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: Tuple - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .check_count({"torch.ops.aten.repeat.default": 1}) - .to_edge() - .partition() - .check_not(["torch.ops.aten.repeat.default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - @parameterized.expand(Repeat.test_parameters) - def test_repeat_tosa_MI(self, test_input, multiples): - self._test_repeat_tosa_MI_pipeline(self.Repeat(), (test_input, multiples)) - - @parameterized.expand(Repeat.test_parameters) - def test_repeat_tosa_BI(self, test_input, multiples): - self._test_repeat_tosa_BI_pipeline(self.Repeat(), (test_input, multiples)) - - @parameterized.expand(Repeat.test_parameters) - def test_repeat_u55_BI(self, test_input, multiples): - self._test_repeat_ethosu_pipeline( - common.get_u55_compile_spec(), self.Repeat(), (test_input, multiples) - ) - - @parameterized.expand(Repeat.test_parameters) - def test_repeat_u85_BI(self, test_input, multiples): - self._test_repeat_ethosu_pipeline( - common.get_u85_compile_spec(), self.Repeat(), (test_input, multiples) - ) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x, Input y +aten_op = "torch.ops.aten.repeat.default" + + +"""Tests Tensor.repeat for different ranks and dimensions.""" + + +class Repeat(torch.nn.Module): + # (input tensor, multiples) + test_parameters = { + "1_x_1": lambda: (torch.randn(3), (2,)), + "2_x_2": lambda: (torch.randn(3, 4), (2, 1)), + "4_x_4": lambda: (torch.randn(1, 1, 2, 2), (1, 2, 3, 4)), + "1_x_2": lambda: (torch.randn(3), (2, 2)), + "1_x_3": lambda: (torch.randn(3), (1, 2, 3)), + "2_x_3": lambda: (torch.randn((3, 3)), (2, 2, 2)), + "1_x_4": lambda: (torch.randn((3, 3, 3)), (2, 1, 2, 4)), + } + + def forward(self, x: torch.Tensor, multiples: Sequence): + return x.repeat(multiples) + + +@common.parametrize("test_data", Repeat.test_parameters) +def test_repeat_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Repeat(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Repeat.test_parameters) +def test_repeat_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Repeat(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Repeat.test_parameters) +def test_repeat_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Repeat(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize("test_data", Repeat.test_parameters) +def test_repeat_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Repeat(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_rshift.py b/backends/arm/test/ops/test_rshift.py index 52c05d48038..2e11cee5183 100644 --- a/backends/arm/test/ops/test_rshift.py +++ b/backends/arm/test/ops/test_rshift.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. import torch +from executorch.backends.arm.test import common from executorch.backends.arm.test.common import ( XfailIfNoCorstone300, XfailIfNoCorstone320, @@ -14,7 +15,6 @@ TosaPipelineBI, TosaPipelineMI, ) -from parameterized import parameterized scalar_input_t = tuple[torch.Tensor, int] @@ -23,11 +23,20 @@ class RshiftScalar(torch.nn.Module): torch_op_MI = "torch.ops.aten.__rshift__.Scalar" torch_op_BI = "torch.ops.aten.bitwise_right_shift.Tensor" exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_right_shift_Tensor" - test_data = [ - ((torch.randint(-100, 100, (1, 12, 3, 4), dtype=torch.int8), 1),), - ((torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int16), 5),), - ((torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int32), 2),), - ] + test_data = { + "randint_neg_100_int8": lambda: ( + torch.randint(-100, 100, (1, 12, 3, 4), dtype=torch.int8), + 1, + ), + "randint_neg_100_int16": lambda: ( + torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int16), + 5, + ), + "randint_neg_100_int32": lambda: ( + torch.randint(-100, 100, (1, 5, 3, 4), dtype=torch.int32), + 2, + ), + } def forward(self, x: torch.Tensor, shift: int): return x >> shift @@ -39,53 +48,53 @@ def forward(self, x: torch.Tensor, shift: int): class RshiftTensor(torch.nn.Module): torch_op = "torch.ops.aten.bitwise_right_shift.Tensor" exir_op = "executorch_exir_dialects_edge__ops_aten_bitwise_right_shift_Tensor" - test_data = [ - ( - ( - torch.randint(-128, 127, (3, 3), dtype=torch.int8), - torch.randint(0, 5, (3, 3), dtype=torch.int8), - ), + test_data = { + "randint_neg_128_int8": lambda: ( + torch.randint(-128, 127, (3, 3), dtype=torch.int8), + torch.randint(0, 5, (3, 3), dtype=torch.int8), ), - ( - ( - torch.randint(-1024, 1024, (3, 3, 3), dtype=torch.int16), - torch.randint(0, 5, (3, 3, 3), dtype=torch.int16), - ), + "randint_neg_1024_int16": lambda: ( + torch.randint(-1024, 1024, (3, 3, 3), dtype=torch.int16), + torch.randint(0, 5, (3, 3, 3), dtype=torch.int16), ), - ( - ( - torch.randint(0, 127, (1, 2, 3, 3), dtype=torch.int32), - torch.randint(0, 5, (1, 2, 3, 3), dtype=torch.int32), - ), + "randint_0_127_int32": lambda: ( + torch.randint(0, 127, (1, 2, 3, 3), dtype=torch.int32), + torch.randint(0, 5, (1, 2, 3, 3), dtype=torch.int32), ), - ] + } def forward(self, x: torch.Tensor, shift: torch.Tensor): return x.bitwise_right_shift(shift) -@parameterized.expand(RshiftScalar.test_data) -def test_rshift_scalar_tosa_MI(test_data): +@common.parametrize("test_data", RshiftScalar.test_data) +def test_rshift_scalar_tosa_MI_scalar(test_data): TosaPipelineMI[scalar_input_t]( - RshiftScalar(), test_data, RshiftScalar.torch_op_MI, RshiftScalar.exir_op + RshiftScalar(), + test_data(), + RshiftScalar.torch_op_MI, + RshiftScalar.exir_op, ).run() -@parameterized.expand(RshiftScalar.test_data) -def test_rshift_scalar_tosa_BI(test_data): +@common.parametrize("test_data", RshiftScalar.test_data) +def test_bitwise_right_shift_tensor_tosa_BI_scalar(test_data): pipeline = TosaPipelineBI[scalar_input_t]( - RshiftScalar(), test_data, RshiftScalar.torch_op_BI, RshiftScalar.exir_op + RshiftScalar(), + test_data(), + RshiftScalar.torch_op_BI, + RshiftScalar.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() -@parameterized.expand(RshiftScalar.test_data) +@common.parametrize("test_data", RshiftScalar.test_data) @XfailIfNoCorstone300 -def test_rshift_scalar_tosa_u55(test_data): +def test_bitwise_right_shift_tensor_u55_BI_scalar(test_data): pipeline = EthosU55PipelineBI[scalar_input_t]( RshiftScalar(), - test_data, + test_data(), RshiftScalar.torch_op_BI, RshiftScalar.exir_op, run_on_fvp=True, @@ -93,16 +102,16 @@ def test_rshift_scalar_tosa_u55(test_data): pipeline.pop_stage("check.quant_nodes") # Forced rounding in U55 HW causes off-by-one errors. - pipeline.change_args("run_method_and_compare_outputs", inputs=test_data, atol=1) + pipeline.change_args("run_method_and_compare_outputs", inputs=test_data(), atol=1) pipeline.run() -@parameterized.expand(RshiftScalar.test_data) +@common.parametrize("test_data", RshiftScalar.test_data) @XfailIfNoCorstone320 -def test_rshift_scalar_tosa_u85(test_data): +def test_bitwise_right_shift_tensor_u85_BI_scalar(test_data): pipeline = EthosU85PipelineBI[scalar_input_t]( RshiftScalar(), - test_data, + test_data(), RshiftScalar.torch_op_BI, RshiftScalar.exir_op, run_on_fvp=True, @@ -111,28 +120,34 @@ def test_rshift_scalar_tosa_u85(test_data): pipeline.run() -@parameterized.expand(RshiftTensor.test_data) -def test_rshift_tensor_tosa_MI(test_data): +@common.parametrize("test_data", RshiftTensor.test_data) +def test_rshift_scalar_tosa_MI(test_data): TosaPipelineMI[scalar_input_t]( - RshiftTensor(), test_data, RshiftTensor.torch_op, RshiftTensor.exir_op + RshiftTensor(), + test_data(), + RshiftTensor.torch_op, + RshiftTensor.exir_op, ).run() -@parameterized.expand(RshiftTensor.test_data) -def test_rshift_tensor_tosa_BI(test_data): +@common.parametrize("test_data", RshiftTensor.test_data) +def test_bitwise_right_shift_tensor_tosa_BI(test_data): pipeline = TosaPipelineBI[scalar_input_t]( - RshiftTensor(), test_data, RshiftTensor.torch_op, RshiftTensor.exir_op + RshiftTensor(), + test_data(), + RshiftTensor.torch_op, + RshiftTensor.exir_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() -@parameterized.expand(RshiftTensor.test_data) +@common.parametrize("test_data", RshiftTensor.test_data) @XfailIfNoCorstone300 -def test_rshift_tensor_tosa_u55(test_data): +def test_bitwise_right_shift_tensor_u55_BI(test_data): pipeline = EthosU55PipelineBI[scalar_input_t]( RshiftTensor(), - test_data, + test_data(), RshiftTensor.torch_op, RshiftTensor.exir_op, run_on_fvp=True, @@ -140,16 +155,16 @@ def test_rshift_tensor_tosa_u55(test_data): pipeline.pop_stage("check.quant_nodes") # Forced rounding in U55 HW causes off-by-one errors. - pipeline.change_args("run_method_and_compare_outputs", inputs=test_data, atol=1) + pipeline.change_args("run_method_and_compare_outputs", inputs=test_data(), atol=1) pipeline.run() -@parameterized.expand(RshiftTensor.test_data) +@common.parametrize("test_data", RshiftTensor.test_data) @XfailIfNoCorstone320 -def test_rshift_tensor_tosa_u85(test_data): +def test_bitwise_right_shift_tensor_u85_BI(test_data): pipeline = EthosU85PipelineBI[scalar_input_t]( RshiftTensor(), - test_data, + test_data(), RshiftTensor.torch_op, RshiftTensor.exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_rsqrt.py b/backends/arm/test/ops/test_rsqrt.py index 2bf5fc371c8..0a9e95d890e 100644 --- a/backends/arm/test/ops/test_rsqrt.py +++ b/backends/arm/test/ops/test_rsqrt.py @@ -1,5 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. -# All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -7,101 +6,78 @@ # Tests the rsqrt op. # -import unittest +from typing import Tuple import torch + from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestRsqrt(unittest.TestCase): - class Rsqrt(torch.nn.Module): - test_parameters = [ - (torch.ones(1, 10, 10, 10),), - (torch.rand(1, 10, 10, 10),), - (torch.rand(1, 5, 10, 20),), - (torch.rand(5, 10, 20),), - ] - - def forward(self, x: torch.Tensor): - return x.rsqrt() - - def _test_rsqrt_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.rsqrt.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_rsqrt_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.rsqrt.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_rsqrt_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: tuple[torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.rsqrt.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - @parameterized.expand(Rsqrt.test_parameters) - def test_rsqrt_tosa_MI(self, test_tensor: torch.Tensor): - self._test_rsqrt_tosa_MI_pipeline(self.Rsqrt(), (test_tensor,)) - - @parameterized.expand(Rsqrt.test_parameters) - def test_rsqrt_tosa_BI(self, test_tensor: torch.Tensor): - self._test_rsqrt_tosa_BI_pipeline(self.Rsqrt(), (test_tensor,)) - - @parameterized.expand(Rsqrt.test_parameters) - def test_rsqrt_u55_BI(self, test_tensor: torch.Tensor): - self._test_rsqrt_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Rsqrt(), (test_tensor,) - ) - - @parameterized.expand(Rsqrt.test_parameters) - def test_rsqrt_u85_BI(self, test_tensor: torch.Tensor): - self._test_rsqrt_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Rsqrt(), (test_tensor,) - ) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + + +aten_op = "torch.ops.aten.rsqrt.default" +input_t1 = Tuple[torch.Tensor] # Input x + + +class Rsqrt(torch.nn.Module): + test_parameters = { + "ones_4d": lambda: (torch.ones(1, 10, 10, 10),), + "rand_4d_1": lambda: (torch.rand(1, 10, 10, 10),), + "rand_4d_2": lambda: (torch.rand(1, 5, 10, 20),), + "rand_3d": lambda: (torch.rand(5, 10, 20),), + } + + def forward(self, x: torch.Tensor): + return x.rsqrt() + + +@common.parametrize("test_tensor", Rsqrt.test_parameters) +def test_rsqrt_tosa_MI(test_tensor: torch.Tensor): + pipeline = TosaPipelineMI[input_t1]( + Rsqrt(), + test_tensor(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_tensor", Rsqrt.test_parameters) +def test_rsqrt_tosa_BI(test_tensor: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Rsqrt(), + test_tensor(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_tensor", Rsqrt.test_parameters) +@common.XfailIfNoCorstone300 +def test_rsqrt_u55_BI(test_tensor: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Rsqrt(), + test_tensor(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_tensor", Rsqrt.test_parameters) +@common.XfailIfNoCorstone320 +def test_rsqrt_u85_BI(test_tensor: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Rsqrt(), + test_tensor(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_scalar_tensor.py b/backends/arm/test/ops/test_scalar_tensor.py index ad9d385c1d1..7b050f7787e 100644 --- a/backends/arm/test/ops/test_scalar_tensor.py +++ b/backends/arm/test/ops/test_scalar_tensor.py @@ -14,22 +14,22 @@ ) float_test_data_suite = { - "scalar_tensor_float_1": (3.7, torch.float32, torch.rand((1, 2, 3, 4))), - "scalar_tensor_float_2": (66, torch.float32, torch.rand((1, 2, 3))), + "scalar_tensor_float_1": lambda: (3.7, torch.float32, torch.rand((1, 2, 3, 4))), + "scalar_tensor_float_2": lambda: (66, torch.float32, torch.rand((1, 2, 3))), } int_test_data_suite = { - "scalar_tensor_int32": ( + "scalar_tensor_int32": lambda: ( 33, torch.int32, torch.randint(0, 10, (1, 2), dtype=torch.int32), ), - "scalar_tensor_int8": ( + "scalar_tensor_int8": lambda: ( 8, torch.int8, torch.rand(1, 2, 3), ), - "scalar_tensor_int16": ( + "scalar_tensor_int16": lambda: ( 16 * 16 * 16, torch.int16, torch.rand((1,)).unsqueeze(0), # Rank 0 inputs not supported @@ -49,17 +49,29 @@ def forward(self, x: torch.Tensor): return torch.scalar_tensor(self.scalar, dtype=self.dtype) + x -@common.parametrize("test_data", int_test_data_suite | float_test_data_suite) +@common.parametrize( + "test_data", + int_test_data_suite | float_test_data_suite, +) def test_scalar_tensor_tosa_MI(test_data): # Note TOSA MI supports all types - scalar, dtype, data = test_data - TosaPipelineMI(ScalarTensor(scalar, dtype), tuple(data), ScalarTensor.aten_op).run() + scalar, dtype, data = test_data() + TosaPipelineMI( + ScalarTensor(scalar, dtype), + tuple(data), + ScalarTensor.aten_op, + ).run() -@common.parametrize("test_data", int_test_data_suite | float_test_data_suite) +@common.parametrize( + "test_data", + int_test_data_suite | float_test_data_suite, +) def test_scalar_tensor_tosa_BI(test_data): - scalar, dtype, data = test_data + scalar, dtype, data = test_data() pipeline: TosaPipelineBI = TosaPipelineBI( - ScalarTensor(scalar, dtype), tuple(data), ScalarTensor.aten_op + ScalarTensor(scalar, dtype), + tuple(data), + ScalarTensor.aten_op, ) pipeline.pop_stage("check.quant_nodes") pipeline.run() @@ -67,8 +79,8 @@ def test_scalar_tensor_tosa_BI(test_data): @common.parametrize("test_data", float_test_data_suite) @common.XfailIfNoCorstone300 -def test_scalar_tensor_tosa_u55(test_data): - scalar, dtype, data = test_data +def test_scalar_tensor_u55_BI(test_data): + scalar, dtype, data = test_data() EthosU55PipelineBI( ScalarTensor(scalar, dtype), tuple(data), @@ -80,8 +92,8 @@ def test_scalar_tensor_tosa_u55(test_data): @common.parametrize("test_data", float_test_data_suite) @common.XfailIfNoCorstone320 -def test_scalar_tensor_tosa_u85(test_data): - scalar, dtype, data = test_data +def test_scalar_tensor_u85_BI(test_data): + scalar, dtype, data = test_data() EthosU85PipelineBI( ScalarTensor(scalar, dtype), tuple(data), diff --git a/backends/arm/test/ops/test_scalars.py b/backends/arm/test/ops/test_scalars.py index 97af070120b..a4748e93fdb 100644 --- a/backends/arm/test/ops/test_scalars.py +++ b/backends/arm/test/ops/test_scalars.py @@ -3,13 +3,12 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import common import torch +from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( TosaPipelineBI, TosaPipelineMI, @@ -32,90 +31,103 @@ input_t1 = Tuple[torch.Tensor, torch.scalar_tensor] # Input x, Input y -class TestScalars(unittest.TestCase): - """Tests various scalar cases""" +"""Tests various scalar cases""" - class Add(torch.nn.Module): - def forward(self, x, y): - return x + y - class Sub(torch.nn.Module): - def forward(self, x, y): - return x - y +class Add(torch.nn.Module): + def forward(self, x, y): + return x + y - class Div(torch.nn.Module): - def forward(self, x, y): - return x / y - class Mul(torch.nn.Module): - def forward(self, x, y): - return x * y +class Sub(torch.nn.Module): + def forward(self, x, y): + return x - y - class MulScalar(torch.nn.Module): - def forward(self, x, y): - return torch.ops.aten.mul.Scalar(x, y) - class DivScalar(torch.nn.Module): - def forward(self, x, y): - return torch.ops.aten.div.Scalar(x, y) +class Div(torch.nn.Module): + def forward(self, x, y): + return x / y - class AddScalar(torch.nn.Module): - def forward(self, x, y): - return torch.ops.aten.add.Scalar(x, y) - class SubScalar(torch.nn.Module): - def forward(self, x, y): - return torch.ops.aten.sub.Scalar(x, y) +class Mul(torch.nn.Module): + def forward(self, x, y): + return x * y - class AddInplace(torch.nn.Module): - def forward(self, x, y): - x += y - return x - class SubInplace(torch.nn.Module): - def forward(self, x, y): - x -= y - return x +class MulScalar(torch.nn.Module): + def forward(self, x, y): + return torch.ops.aten.mul.Scalar(x, y) - class DivInplace(torch.nn.Module): - def forward(self, x, y): - x /= y - return x - class MulInplace(torch.nn.Module): - def forward(self, x, y): - x *= y - return x +class DivScalar(torch.nn.Module): + def forward(self, x, y): + return torch.ops.aten.div.Scalar(x, y) - class AddConst(torch.nn.Module): - def forward(self, x): - x = 1.0 + x - return x - class ShiftInplaceSub(torch.nn.Module): - def forward(self, x): - x = x >> 4 - x -= 10 - return x +class AddScalar(torch.nn.Module): + def forward(self, x, y): + return torch.ops.aten.add.Scalar(x, y) + + +class SubScalar(torch.nn.Module): + def forward(self, x, y): + return torch.ops.aten.sub.Scalar(x, y) + + +class AddInplace(torch.nn.Module): + def forward(self, x, y): + x += y + return x + + +class SubInplace(torch.nn.Module): + def forward(self, x, y): + x -= y + return x + + +class DivInplace(torch.nn.Module): + def forward(self, x, y): + x /= y + return x + + +class MulInplace(torch.nn.Module): + def forward(self, x, y): + x *= y + return x + + +class AddConst(torch.nn.Module): + def forward(self, x): + x = 1.0 + x + return x + + +class ShiftInplaceSub(torch.nn.Module): + def forward(self, x): + x = x >> 4 + x -= 10 + return x # Inplace ops end with '_' (from aten naming) ops = [ - ("Add", TestScalars.Add()), - ("Sub", TestScalars.Sub()), - ("Mul", TestScalars.Mul()), - ("Div", TestScalars.Div()), - ("Add_", TestScalars.AddInplace()), - ("Sub_", TestScalars.SubInplace()), - ("Mul_", TestScalars.MulInplace()), - ("Div_", TestScalars.DivInplace()), - ("MulScalar", TestScalars.MulScalar()), - ("DivScalar", TestScalars.DivScalar()), - ("AddScalar", TestScalars.AddScalar()), - ("SubScalar", TestScalars.SubScalar()), + ("Add", Add()), + ("Sub", Sub()), + ("Mul", Mul()), + ("Div", Div()), + ("Add_", AddInplace()), + ("Sub_", SubInplace()), + ("Mul_", MulInplace()), + ("Div_", DivInplace()), + ("MulScalar", MulScalar()), + ("DivScalar", DivScalar()), + ("AddScalar", AddScalar()), + ("SubScalar", SubScalar()), ] -const_ops = [("Add", TestScalars.AddConst())] +const_ops = [("Add", AddConst())] dtypes = [("int", 3), ("float", 3.0)] sizes = [("r1", (1)), ("r4", (2, 4, 5, 3))] @@ -198,16 +210,18 @@ def _test_add_tosa_BI_pipeline( } -@common.parametrize("tensor_scalar_tests", tensor_scalar_tests, MI_xfails) -def test_MI(tensor_scalar_tests: list): +@common.parametrize( + "tensor_scalar_tests", + tensor_scalar_tests, + MI_xfails, +) +def test_tosa_MI(tensor_scalar_tests: list): op, x, y = tensor_scalar_tests _test_add_tosa_MI_pipeline(op, (x, y)) def _test_passes_tosa_BI_pipeline(module: torch.nn.Module, test_data: tuple): - pipeline = TransformAnnotationPassPipeline[input_t1]( - module, test_data, tosa_version="TOSA-0.80+BI" - ) + pipeline = TransformAnnotationPassPipeline[input_t1](module, test_data) pipeline.run() @@ -221,39 +235,47 @@ def _test_passes_tosa_BI_pipeline(module: torch.nn.Module, test_data: tuple): @common.parametrize( - "tensor_scalar_tests", tensor_scalar_tests, passes_xfails, strict=False + "tensor_scalar_tests", + tensor_scalar_tests, + passes_xfails, + strict=False, ) -def test_passes_BI(tensor_scalar_tests: list): +def test_scalars_tosa_BI_passes(tensor_scalar_tests: list): op, x, y = tensor_scalar_tests _test_passes_tosa_BI_pipeline(op, (x, y)) # op(Scalar float, tensor) works if the scalar is constant. @common.parametrize("tensor_const_tests", tensor_const_tests) -def test_MI_const(tensor_const_tests: list): +def test_scalars_tosa_MI(tensor_const_tests: list): op, x = tensor_const_tests _test_add_tosa_MI_pipeline(op, (x,)) @common.parametrize("tensor_scalar_tests", tensor_scalar_tests) -def test_BI(tensor_scalar_tests: list): +def test_scalars_tosa_BI(tensor_scalar_tests: list): op, x, y = tensor_scalar_tests _test_add_tosa_BI_pipeline(op, (x, y)) # op(Scalar float, tensor) works if the scalar is constant. @common.parametrize("tensor_const_tests", tensor_const_tests) -def test_BI_const(tensor_const_tests: list): +def test_scalars_tosa_BI_const(tensor_const_tests: list): op, x = tensor_const_tests _test_add_tosa_BI_pipeline(op, (x,)) def test_shift_sub_inplace_tosa_MI(): - _test_add_tosa_MI_pipeline(TestScalars.ShiftInplaceSub(), (torch.IntTensor(5),)) + _test_add_tosa_MI_pipeline( + ShiftInplaceSub(), + (torch.IntTensor(5),), + ) # Do not check for quant nodes in the graph for rshift. def test_shift_sub_inplace_tosa_BI(): _test_add_tosa_BI_pipeline( - TestScalars.ShiftInplaceSub(), (torch.IntTensor(5),), check_quant_nodes=False + ShiftInplaceSub(), + (torch.IntTensor(5),), + check_quant_nodes=False, ) diff --git a/backends/arm/test/ops/test_select.py b/backends/arm/test/ops/test_select.py index fbeb4ebf9e7..a0b72942d44 100644 --- a/backends/arm/test/ops/test_select.py +++ b/backends/arm/test/ops/test_select.py @@ -1,182 +1,157 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest + +from typing import Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) -test_data_t = tuple[torch.Tensor, int, int] +input_t1 = Tuple[torch.Tensor, int, int] -test_data_suite: list[tuple[test_data_t]] = [ +test_data_suite = { # (test_data, dim, index) - ((torch.zeros(5, 3, 20), -1, 0),), - ((torch.rand(5, 3, 20), 0, -1),), - ((torch.zeros(5, 3, 20), 0, 4),), - ((torch.ones(10, 10, 10), 0, 2),), - ((torch.rand(5, 3, 20, 2), 0, 2),), - ((torch.rand(10, 10) - 0.5, 0, 0),), - ((torch.randn(10) + 10, 0, 1),), - ((torch.randn(10) - 10, 0, 2),), - ((torch.arange(-16, 16, 0.2), 0, 1),), -] - - -class TestSelect(unittest.TestCase): - class SelectCopy(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, dim: int, index: int): - return torch.select_copy(x, dim=dim, index=index) - - class SelectInt(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, dim: int, index: int): - return torch.select(x, dim=dim, index=index) - - def _test_select_tosa_MI_pipeline( - self, - module: torch.nn.Module, - test_data: test_data_t, - export_target: str, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check([export_target]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_select_tosa_BI_pipeline( - self, - module: torch.nn.Module, - test_data: test_data_t, - export_target: str, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check([export_target]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_select_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: test_data_t, - export_target: str, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check([export_target]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_select_tosa_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: test_data_t, export_target: str - ): - self._test_select_ethos_BI_pipeline( - common.get_u55_compile_spec(), - module, - test_data, - export_target, - ) - - def _test_select_tosa_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: test_data_t, export_target: str - ): - self._test_select_ethos_BI_pipeline( - common.get_u85_compile_spec(), - module, - test_data, - export_target, - ) - - @parameterized.expand(test_data_suite) - def test_select_copy_tosa_MI(self, test_data: test_data_t): - self._test_select_tosa_MI_pipeline( - self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_int_tosa_MI(self, test_data: test_data_t): - self._test_select_tosa_MI_pipeline( - self.SelectInt(), test_data, export_target="torch.ops.aten.select.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_copy_tosa_BI(self, test_data: test_data_t): - self._test_select_tosa_BI_pipeline( - self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_int_tosa_BI(self, test_data: test_data_t): - self._test_select_tosa_BI_pipeline( - self.SelectInt(), test_data, export_target="torch.ops.aten.select.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_copy_tosa_u55_BI(self, test_data: test_data_t): - self._test_select_tosa_u55_BI_pipeline( - self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_int_tosa_u55_BI(self, test_data: test_data_t): - self._test_select_tosa_u55_BI_pipeline( - self.SelectInt(), test_data, export_target="torch.ops.aten.select.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_copy_tosa_u85_BI(self, test_data: test_data_t): - self._test_select_tosa_u85_BI_pipeline( - self.SelectCopy(), test_data, export_target="torch.ops.aten.select_copy.int" - ) - - @parameterized.expand(test_data_suite) - def test_select_int_tosa_u85_BI(self, test_data: test_data_t): - self._test_select_tosa_u85_BI_pipeline( - self.SelectInt(), test_data, export_target="torch.ops.aten.select.int" - ) + "select3d_neg_1_dim_0_index": lambda: (torch.zeros(5, 3, 20), -1, 0), + "select3d_0_dim_neg_1_index": lambda: (torch.rand(5, 3, 20), 0, -1), + "select3d_0_dim_4_index": lambda: (torch.zeros(5, 3, 20), 0, 4), + "select3d_0_dim_2_index": lambda: (torch.ones(10, 10, 10), 0, 2), + "select4d_0_dim_2_index": lambda: (torch.rand(5, 3, 20, 2), 0, 2), + "select2d_0_dim_0_index": lambda: (torch.rand(10, 10) - 0.5, 0, 0), + "select1d_0_dim_1_index": lambda: (torch.randn(10) + 10, 0, 1), + "select1d_0_dim_0_index": lambda: (torch.randn(10) - 10, 0, 2), + "select3d_0_dim_1_index": lambda: (torch.arange(-16, 16, 0.2), 0, 1), +} + +aten_op_copy = "torch.ops.aten.select_copy.int" +aten_op_int = "torch.ops.aten.select.int" + + +class SelectCopy(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, dim: int, index: int): + return torch.select_copy(x, dim=dim, index=index) + + +class SelectInt(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x, dim: int, index: int): + return torch.select(x, dim=dim, index=index) + + +@common.parametrize("test_data", test_data_suite) +def test_select_int_tosa_MI_copy(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + SelectCopy(), + test_data(), + aten_op=aten_op_copy, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_select_int_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + SelectInt(), + test_data(), + aten_op=aten_op_int, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_select_int_tosa_BI_copy(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + SelectCopy(), + test_data(), + aten_op=aten_op_copy, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_select_int_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + SelectInt(), + test_data(), + aten_op=aten_op_int, + exir_op=[], + ) + pipeline.run() + + +x_fails = { + "select4d_0_dim_2_index": "AssertionError: Output 0 does not match reference output." +} + + +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone300 +def test_select_int_u55_BI_copy(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + SelectCopy(), + test_data(), + aten_op_copy, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone300 +def test_select_int_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + SelectInt(), + test_data(), + aten_op_int, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone320 +def test_select_int_u85_BI_copy(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + SelectCopy(), + test_data(), + aten_op_copy, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite, x_fails) +@common.XfailIfNoCorstone320 +def test_select_int_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + SelectInt(), + test_data(), + aten_op_int, + exir_ops=[], + run_on_fvp=True, + use_to_edge_transform_and_lower=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_sigmoid.py b/backends/arm/test/ops/test_sigmoid.py index 43b4abd2039..b5ee68b987b 100644 --- a/backends/arm/test/ops/test_sigmoid.py +++ b/backends/arm/test/ops/test_sigmoid.py @@ -5,189 +5,158 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest from typing import Tuple -import pytest - import torch from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.sigmoid.default" # Used for checking that we do not have softmax in the graph after decompose +exir_op = "executorch_exir_dialects_edge__ops_aten_sigmoid_default" +input_t1 = Tuple[torch.Tensor] # Input x + +test_data_suite = { + # (test_name, test_data) + "zeros": lambda: torch.zeros(10, 10, 10, 10), + "ones": lambda: torch.ones(10, 10, 10), + "rand": lambda: torch.rand(10, 10) - 0.5, + "randn_pos": lambda: torch.randn(10) + 10, + "randn_neg": lambda: torch.randn(10) - 10, + "ramp": lambda: torch.arange(-16, 16, 0.2), +} -test_data_suite = [ - # (test_name, test_data) - ("zeros", torch.zeros(10, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestSigmoid(unittest.TestCase): - class Sigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - self.sigmoid = torch.nn.Sigmoid() - - def forward(self, x): - return self.sigmoid(x) - - class AddSigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - self.sigmoid = torch.nn.Sigmoid() - - def forward(self, x): - return self.sigmoid(x + x) - - class SigmoidAdd(torch.nn.Module): - def __init__(self): - super().__init__() - self.sigmoid = torch.nn.Sigmoid() - - def forward(self, x): - return x + self.sigmoid(x) - - class SigmoidAddSigmoid(torch.nn.Module): - def __init__(self): - super().__init__() - self.sigmoid = torch.nn.Sigmoid() - - def forward(self, x, y): - return self.sigmoid((self.sigmoid(y) + self.sigmoid(x))) - - def _test_sigmoid_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.sigmoid.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - - def _test_sigmoid_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.sigmoid.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - - def _test_sigmoid_tosa_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.sigmoid.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_sigmoid_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_sigmoid_tosa_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_sigmoid_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_sigmoid_tosa_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_sigmoid_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_sigmoid_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_sigmoid_tosa_MI_pipeline(self.Sigmoid(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_sigmoid_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_sigmoid_tosa_BI_pipeline(self.Sigmoid(), (test_data,)) - - @pytest.mark.tosa_ref_model - def test_add_sigmoid_tosa_MI(self): - self._test_sigmoid_tosa_MI_pipeline(self.AddSigmoid(), (test_data_suite[0][1],)) - - @pytest.mark.tosa_ref_model - def test_add_sigmoid_tosa_BI(self): - self._test_sigmoid_tosa_BI_pipeline(self.AddSigmoid(), (test_data_suite[5][1],)) - - @pytest.mark.tosa_ref_model - def test_sigmoid_add_tosa_MI(self): - self._test_sigmoid_tosa_MI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],)) - - @pytest.mark.tosa_ref_model - def test_sigmoid_add_tosa_BI(self): - self._test_sigmoid_tosa_BI_pipeline(self.SigmoidAdd(), (test_data_suite[0][1],)) - - @pytest.mark.tosa_ref_model - def test_sigmoid_add_sigmoid_tosa_MI(self): - self._test_sigmoid_tosa_MI_pipeline( - self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1]) - ) - - @pytest.mark.tosa_ref_model - def test_sigmoid_add_sigmoid_tosa_BI(self): - self._test_sigmoid_tosa_BI_pipeline( - self.SigmoidAddSigmoid(), (test_data_suite[4][1], test_data_suite[3][1]) - ) - - @parameterized.expand(test_data_suite) - def test_sigmoid_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_sigmoid_tosa_u55_BI_pipeline(self.Sigmoid(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_sigmoid_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_sigmoid_tosa_u85_BI_pipeline(self.Sigmoid(), (test_data,)) +class Sigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x): + return self.sigmoid(x) + + +class AddSigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x): + return self.sigmoid(x + x) + + +class SigmoidAdd(torch.nn.Module): + def __init__(self): + super().__init__() + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x): + return x + self.sigmoid(x) + + +class SigmoidAddSigmoid(torch.nn.Module): + def __init__(self): + super().__init__() + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x, y): + return self.sigmoid((self.sigmoid(y) + self.sigmoid(x))) + + +@common.parametrize("test_data", test_data_suite) +def test_sigmoid_tosa_MI(test_data: torch.Tensor): + TosaPipelineMI[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run() + + +@common.parametrize("test_data", test_data_suite) +def test_sigmoid_tosa_BI(test_data: torch.Tensor): + TosaPipelineBI[input_t1](Sigmoid(), (test_data(),), aten_op, exir_op).run() + + +def test_sigmoid_tosa_MI_add(): + TosaPipelineMI[input_t1]( + AddSigmoid(), + (test_data_suite["zeros"](),), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +def test_sigmoid_tosa_BI_add(): + TosaPipelineBI[input_t1]( + AddSigmoid(), + (test_data_suite["ramp"](),), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +def test_sigmoid_tosa_MI_add_2(): + TosaPipelineMI[input_t1]( + SigmoidAdd(), + (test_data_suite["zeros"](),), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +def test_sigmoid_tosa_BI_add_2(): + TosaPipelineBI[input_t1]( + SigmoidAdd(), + (test_data_suite["zeros"](),), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +def test_sigmoid_tosa_MI_add_3(): + TosaPipelineMI[input_t1]( + SigmoidAddSigmoid(), + (test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +def test_sigmoid_tosa_BI_3(): + TosaPipelineBI[input_t1]( + SigmoidAddSigmoid(), + (test_data_suite["randn_neg"](), test_data_suite["randn_pos"]()), + aten_op, + exir_op, + tosa_version=conftest.get_option("tosa_version"), + ).run() + + +@common.parametrize("test_data", test_data_suite) +def test_sigmoid_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Sigmoid(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_sigmoid_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Sigmoid(), + (test_data(),), + aten_op, + exir_op, + run_on_fvp=False, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_sigmoid_16bit.py b/backends/arm/test/ops/test_sigmoid_16bit.py index 3cd11699a0a..ddec8c61eb9 100644 --- a/backends/arm/test/ops/test_sigmoid_16bit.py +++ b/backends/arm/test/ops/test_sigmoid_16bit.py @@ -11,12 +11,13 @@ TOSAQuantizer, ) from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.test_pipeline import ( EthosU85PipelineBI, OpNotSupportedPipeline, TosaPipelineBI, ) +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester import Quantize from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.quantizer import QuantizationSpec @@ -37,9 +38,18 @@ def _get_16_bit_quant_config(): return qconfig -def get_16bit_sigmoid_quantizer(tosa_str: str): - tosa_spec = common.TosaSpecification.create_from_string(tosa_str) - quantizer = TOSAQuantizer(tosa_spec) +def get_16bit_sigmoid_quantizer(u55_config=False): + tosa_version = conftest.get_option("tosa_version") + tosa_profiles = { + "0.80": TosaSpecification.create_from_string( + "TOSA-0.80+BI" + ("+u55" if u55_config else "") + ), + "1.0": TosaSpecification.create_from_string( + "TOSA-1.0+INT" + ("+u55" if u55_config else "") + ), + } + + quantizer = TOSAQuantizer(tosa_profiles[tosa_version]) quantizer.set_global(get_symmetric_quantization_config()) quantizer.set_module_type( torch.nn.modules.activation.Sigmoid, _get_16_bit_quant_config() @@ -86,7 +96,7 @@ def test_sigmoid_tosa_BI(test_data): pipeline = TosaPipelineBI( Sigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_16bit_sigmoid_quantizer()) pipeline.run() @@ -96,26 +106,41 @@ def test_sigmoid_tosa_BI(test_data): xfails={ "ramp": "AssertionError: Output 0 does not match reference output. MLETORCH-787" }, + strict=False, ) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_add_sigmoid_tosa_BI(test_data): +def test_sigmoid_tosa_BI_add_sigmoid(test_data): pipeline = TosaPipelineBI( - SigmoidAddSigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op + SigmoidAddSigmoid(), + (test_data(),), + Sigmoid.aten_op, + Sigmoid.exir_op, ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI")) pipeline.run() +xfails = { + "ones": "AssertionError: Output 0 does not match reference output. MLETORCH-787", + "rand": "AssertionError: Output 0 does not match reference output. MLETORCH-787", + "rand_4d": "AssertionError: Output 0 does not match reference output. MLETORCH-787", + "ramp": "AssertionError: Output 0 does not match reference output. MLETORCH-787", +} + + @common.parametrize( "test_data", test_data_suite, ) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_tosa_u55(test_data): +def test_sigmoid_u55_BI(test_data): pipeline = OpNotSupportedPipeline( - Sigmoid(), (test_data(),), "TOSA-0.80+BI+u55", {Sigmoid.exir_op: 1} + Sigmoid(), + (test_data(),), + {Sigmoid.exir_op: 1}, + quantize=True, + u55_subset=True, ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI+u55")) + pipeline.change_args("quantize", get_16bit_sigmoid_quantizer(True)) pipeline.run() @@ -124,26 +149,31 @@ def test_sigmoid_tosa_u55(test_data): test_data_suite, ) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_add_sigmoid_tosa_u55(test_data): +def test_sigmoid_u55_BI_add_sigmoid(test_data): pipeline = OpNotSupportedPipeline( SigmoidAddSigmoid(), (test_data(),), - "TOSA-0.80+BI+u55", {Sigmoid.exir_op: 3}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI+u55")) + pipeline.change_args("quantize", get_16bit_sigmoid_quantizer(True)) pipeline.run() @common.parametrize("test_data", test_data_suite) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 @common.XfailIfNoCorstone320 -def test_sigmoid_tosa_u85(test_data): +def test_sigmoid_u85_BI(test_data): pipeline = EthosU85PipelineBI( - Sigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op, run_on_fvp=True + Sigmoid(), + (test_data(),), + Sigmoid.aten_op, + Sigmoid.exir_op, + run_on_fvp=True, ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_16bit_sigmoid_quantizer()) pipeline.run() @@ -156,7 +186,7 @@ def test_sigmoid_tosa_u85(test_data): ) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 @common.XfailIfNoCorstone320 -def test_sigmoid_add_sigmoid_tosa_u85(test_data): +def test_sigmoid_u85_BI_add_sigmoid(test_data): pipeline = EthosU85PipelineBI( SigmoidAddSigmoid(), (test_data(),), @@ -164,5 +194,5 @@ def test_sigmoid_add_sigmoid_tosa_u85(test_data): Sigmoid.exir_op, run_on_fvp=True, ) - pipeline.change_args("quantize", get_16bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_16bit_sigmoid_quantizer()) pipeline.run() diff --git a/backends/arm/test/ops/test_sigmoid_32bit.py b/backends/arm/test/ops/test_sigmoid_32bit.py index fbfc263a6d0..a0fe077da5f 100644 --- a/backends/arm/test/ops/test_sigmoid_32bit.py +++ b/backends/arm/test/ops/test_sigmoid_32bit.py @@ -7,12 +7,13 @@ import torch from executorch.backends.arm.quantizer import TOSAQuantizer from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.test_pipeline import ( EthosU85PipelineBI, OpNotSupportedPipeline, TosaPipelineBI, ) +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester import Quantize from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.quantizer import QuantizationSpec @@ -53,9 +54,18 @@ def _get_32_bit_quant_config(): return qconfig -def get_32bit_sigmoid_quantizer(tosa_str: str): - tosa_spec = common.TosaSpecification.create_from_string(tosa_str) - quantizer = TOSAQuantizer(tosa_spec) +def get_32bit_sigmoid_quantizer(u55_config=False): + tosa_version = conftest.get_option("tosa_version") + tosa_profiles = { + "0.80": TosaSpecification.create_from_string( + "TOSA-0.80+BI" + ("+u55" if u55_config else "") + ), + "1.0": TosaSpecification.create_from_string( + "TOSA-1.0+INT" + ("+u55" if u55_config else "") + ), + } + + quantizer = TOSAQuantizer(tosa_profiles[tosa_version]) quantizer.set_global(_get_32_bit_quant_config()) quantizer.set_module_type( torch.nn.modules.activation.Sigmoid, _get_16_bit_quant_config() @@ -105,55 +115,65 @@ def test_sigmoid_tosa_BI(test_data): Sigmoid.aten_op, Sigmoid.exir_op, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer()) pipeline.run() @common.parametrize("test_data", test_data_suite) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_add_sigmoid_tosa_BI(test_data): +def test_sigmoid_tosa_BI_add_sigmoid(test_data): pipeline = TosaPipelineBI( SigmoidAddSigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer()) pipeline.run() @common.parametrize("test_data", test_data_suite) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_tosa_u55(test_data): +def test_sigmoid_u55_BI(test_data): pipeline = OpNotSupportedPipeline( - Sigmoid(), (test_data(),), "TOSA-0.80+BI+u55", {Sigmoid.exir_op: 1} + Sigmoid(), + (test_data(),), + {Sigmoid.exir_op: 1}, + quantize=True, + u55_subset=True, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI+u55")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer(True)) pipeline.run() @common.parametrize("test_data", test_data_suite) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 -def test_sigmoid_add_sigmoid_tosa_u55(test_data): +def test_sigmoid_u55_BI_add_sigmoid(test_data): pipeline = OpNotSupportedPipeline( SigmoidAddSigmoid(), (test_data(),), - "TOSA-0.80+BI+u55", {Sigmoid.exir_op: 3}, n_expected_delegates=1, + quantize=True, + u55_subset=True, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI+u55")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer(True)) pipeline.run() @common.parametrize("test_data", test_data_suite) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 @common.XfailIfNoCorstone320 -def test_sigmoid_tosa_u85(test_data): +@pytest.mark.flaky(reruns=5) +def test_sigmoid_u85_BI(test_data): pipeline = EthosU85PipelineBI( - Sigmoid(), (test_data(),), Sigmoid.aten_op, Sigmoid.exir_op, run_on_fvp=True + Sigmoid(), + (test_data(),), + Sigmoid.aten_op, + Sigmoid.exir_op, + run_on_fvp=True, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer()) pipeline.run() @@ -162,11 +182,14 @@ def test_sigmoid_tosa_u85(test_data): test_data_suite, xfails={ "ramp": "AssertionError: Output 0 does not match reference output.", + "rand": "AssertionError: Output 0 does not match reference output.", + "rand_4d": "AssertionError: Output 0 does not match reference output.", }, ) @pytest.mark.flaky(reruns=32) # Flaky due to Vela bug: MLBEDSW-10642 @common.XfailIfNoCorstone320 -def test_sigmoid_add_sigmoid_tosa_u85(test_data): +@pytest.mark.flaky(reruns=5) +def test_sigmoid_u85_BI_add_sigmoid(test_data): pipeline = EthosU85PipelineBI( SigmoidAddSigmoid(), (test_data(),), @@ -174,5 +197,5 @@ def test_sigmoid_add_sigmoid_tosa_u85(test_data): Sigmoid.exir_op, run_on_fvp=True, ) - pipeline.change_args("quantize", get_32bit_sigmoid_quantizer("TOSA-0.80+BI")) + pipeline.change_args("quantize", get_32bit_sigmoid_quantizer()) pipeline.run() diff --git a/backends/arm/test/ops/test_silu.py b/backends/arm/test/ops/test_silu.py index 51748b02450..e1736bf10e6 100644 --- a/backends/arm/test/ops/test_silu.py +++ b/backends/arm/test/ops/test_silu.py @@ -30,14 +30,14 @@ def forward( return torch.nn.SiLU(inplace=_inplace)(_input) test_data: list[input_t] = { - "op_silu_rank1_ones": (torch.ones(5),), - "op_silu_rank1_negative_ones": (torch.ones(5) * (-1),), - "op_silu_rank1_rand": (torch.rand(5) * 5,), - "op_silu_rank4_ones": (torch.ones(1, 10, 25, 20),), - "op_silu_rank4_negative_ones": ((-1) * torch.ones(1, 10, 25, 20),), - "op_silu_rank4_large_rand": (200 * torch.rand(1, 10, 25, 20),), - "op_silu_rank4_negative_large_rand": ((-200) * torch.rand(1, 10, 25, 20),), - "op_silu_rank4_large_randn": (200 * torch.randn(1, 10, 25, 20) + 1,), + "op_silu_rank1_ones": lambda: torch.ones(5), + "op_silu_rank1_negative_ones": lambda: torch.ones(5) * (-1), + "op_silu_rank1_rand": lambda: torch.rand(5) * 5, + "op_silu_rank4_ones": lambda: torch.ones(1, 10, 25, 20), + "op_silu_rank4_negative_ones": lambda: (-1) * torch.ones(1, 10, 25, 20), + "op_silu_rank4_large_rand": lambda: 200 * torch.rand(1, 10, 25, 20), + "op_silu_rank4_negative_large_rand": lambda: (-200) * torch.rand(1, 10, 25, 20), + "op_silu_rank4_large_randn": lambda: 200 * torch.randn(1, 10, 25, 20) + 1, } aten_op_MI = "torch.ops.aten.silu.default" @@ -47,28 +47,28 @@ def forward( @common.parametrize("test_data", Silu.test_data) def test_silu_tosa_MI(test_data: input_t): - silu_data = (test_data[0], False) + silu_data = (test_data(), False) pipeline = TosaPipelineMI[input_t](Silu(), silu_data, Silu.aten_op_MI) pipeline.run() @common.parametrize("test_data", Silu.test_data) def test_silu_tosa_MI_inplace(test_data: input_t): - silu_data = (test_data[0], True) + silu_data = (test_data(), True) pipeline = TosaPipelineMI[input_t](Silu(), silu_data, Silu.aten_op_inplace_MI) pipeline.run() @common.parametrize("test_data", Silu.test_data) def test_silu_tosa_BI(test_data: input_t): - silu_data = (test_data[0], False) + silu_data = (test_data(), False) pipeline = TosaPipelineBI[input_t](Silu(), silu_data, Silu.aten_op_BI) pipeline.run() @common.parametrize("test_data", Silu.test_data) def test_silu_tosa_BI_inplace(test_data: input_t): - silu_data = (test_data[0], True) + silu_data = (test_data(), True) pipeline = TosaPipelineBI[input_t](Silu(), silu_data, Silu.aten_op_BI) pipeline.run() @@ -76,7 +76,7 @@ def test_silu_tosa_BI_inplace(test_data: input_t): @common.parametrize("test_data", Silu.test_data) @common.XfailIfNoCorstone300 def test_silu_u55_BI(test_data: input_t): - silu_data = (test_data[0], False) + silu_data = (test_data(), False) pipeline = EthosU55PipelineBI[input_t]( Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True ) @@ -86,7 +86,7 @@ def test_silu_u55_BI(test_data: input_t): @common.parametrize("test_data", Silu.test_data) @common.XfailIfNoCorstone300 def test_silu_u55_BI_inplace(test_data: input_t): - silu_data = (test_data[0], True) + silu_data = (test_data(), True) pipeline = EthosU55PipelineBI[input_t]( Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True ) @@ -96,7 +96,7 @@ def test_silu_u55_BI_inplace(test_data: input_t): @common.parametrize("test_data", Silu.test_data) @common.XfailIfNoCorstone320 def test_silu_u85_BI(test_data: input_t): - silu_data = (test_data[0], False) + silu_data = (test_data(), False) pipeline = EthosU85PipelineBI[input_t]( Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True ) @@ -106,7 +106,7 @@ def test_silu_u85_BI(test_data: input_t): @common.parametrize("test_data", Silu.test_data) @common.XfailIfNoCorstone320 def test_silu_u85_BI_inplace(test_data: input_t): - silu_data = (test_data[0], True) + silu_data = (test_data(), True) pipeline = EthosU85PipelineBI[input_t]( Silu(), silu_data, Silu.aten_op_BI, run_on_fvp=True ) diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py index 91ef51cc2a2..6ae12c41657 100644 --- a/backends/arm/test/ops/test_slice.py +++ b/backends/arm/test/ops/test_slice.py @@ -4,135 +4,91 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest -from typing import Tuple -import pytest +from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_suite = [ - (torch.ones(10), [(3, -3)]), - (torch.ones(10), [(-8, 3)]), - (torch.ones(10, 10), [(1, 3), (3, None)]), - (torch.ones(10, 10, 10), [(0, 7), (0, None), (0, 8)]), - (torch.ones((1, 12, 10, 10)), [(None, None), (None, 5), (3, 5), (4, 10)]), -] - - -class TestSimpleSlice(unittest.TestCase): - - class Slice(torch.nn.Module): - def forward(self, x: torch.Tensor, s: list[tuple[int, int]]): - slices = [slice(*i) for i in s] - return x[slices] - - def _test_slice_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: torch.Tensor - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.slice.Tensor"]) - .to_edge() - .check(["executorch_exir_dialects_edge__ops_aten_slice_copy"]) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - - def _test_slice_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.slice.Tensor"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) - - def _test_slice_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check(["torch.ops.aten.slice.Tensor"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_slice_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - self._test_slice_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_slice_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - self._test_slice_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_slice_tosa_MI(self, tensor: torch.Tensor, slices: list[tuple[int, int]]): - self._test_slice_tosa_MI_pipeline(self.Slice(), (tensor, slices)) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_slice_nchw_tosa_BI( - self, tensor: torch.Tensor, slices: list[tuple[int, int]] - ): - self._test_slice_tosa_BI_pipeline(self.Slice(), (tensor, slices)) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_slice_nhwc_tosa_BI( - self, tensor: torch.Tensor, slices: list[tuple[int, int]] - ): - self._test_slice_tosa_BI_pipeline(self.Slice(), (tensor, slices)) - - @parameterized.expand(test_data_suite) - def test_slice_u55_BI(self, tensor: torch.Tensor, slices: list[tuple[int, int]]): - self._test_slice_u55_BI_pipeline(self.Slice(), (tensor, slices)) - - @parameterized.expand(test_data_suite) - def test_slice_u85_BI(self, tensor: torch.Tensor, slices: list[tuple[int, int]]): - self._test_slice_u85_BI_pipeline(self.Slice(), (tensor, slices)) +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.slice.Tensor" +exir_op = "executorch_exir_dialects_edge__ops_aten_slice_copy" + +input_t1 = Tuple[torch.Tensor] # Input x + +test_data_suite = { + "ones_neg_3": lambda: (torch.ones(10), [(3, -3)]), + "ones_neg_8": lambda: (torch.ones(10), [(-8, 3)]), + "ones_slice_2": lambda: (torch.ones(10, 10), [(1, 3), (3, None)]), + "ones_slice_3": lambda: (torch.ones(10, 10, 10), [(0, 7), (0, None), (0, 8)]), + "ones_slice_4": lambda: ( + torch.ones((1, 12, 10, 10)), + [(None, None), (None, 5), (3, 5), (4, 10)], + ), +} + + +class Slice(torch.nn.Module): + + def forward(self, x: torch.Tensor, s: list[tuple[int, int]]): + slices = [slice(*i) for i in s] + return x[slices] + + +@common.parametrize("test_data", test_data_suite) +def test_slice_tensor_tosa_MI(test_data: torch.Tensor): + pipeline = TosaPipelineMI[input_t1](Slice(), test_data(), aten_op, exir_op) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_slice_tensor_tosa_BI_nchw(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Slice(), + test_data(), + aten_op, + exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_slice_tensor_tosa_BI_nhwc(test_data: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Slice(), + test_data(), + aten_op, + exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_slice_tensor_u55_BI(test_data: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Slice(), + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_slice_tensor_u85_BI(test_data: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Slice(), + test_data(), + aten_ops=[], + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_softmax.py b/backends/arm/test/ops/test_softmax.py index dcee5d038f2..5ab616c0eea 100644 --- a/backends/arm/test/ops/test_softmax.py +++ b/backends/arm/test/ops/test_softmax.py @@ -18,7 +18,6 @@ aten_op = "torch.ops.aten.softmax.default" # Used for checking that we do not have softmax in the graph after decompose exir_op = "executorch_exir_dialects_edge__ops_aten__softmax_tensor" - input_t1 = Tuple[torch.Tensor] # Input x @@ -31,20 +30,20 @@ def forward(self, x): return self.softmax(x) test_data = { - "ones": ((torch.ones(10, 10),), 1), - "ones_neg_dim": ((torch.ones(1, 3, 4),), -1), - "randn_neg_dim": ((torch.randn(1, 5, 8, 7),), -3), - "zeros": ((torch.zeros(1, 8, 5, 2),), 0), - "zeros_neg_dim": ((torch.zeros(1, 7, 8, 9),), -4), - "rand": ((torch.rand(1, 2, 5, 8),), 2), - "rand_neg_dim": ((torch.rand(1, 10, 8, 10),), -2), - "randn_mult_batches": ((torch.randn(2, 10, 10, 10),), 3), + "ones": lambda: ((torch.ones(10, 10),), 1), + "ones_neg_dim": lambda: ((torch.ones(1, 3, 4),), -1), + "randn_neg_dim": lambda: ((torch.randn(1, 5, 8, 7),), -3), + "zeros": lambda: ((torch.zeros(1, 8, 5, 2),), 0), + "zeros_neg_dim": lambda: ((torch.zeros(1, 7, 8, 9),), -4), + "rand": lambda: ((torch.rand(1, 2, 5, 8),), 2), + "rand_neg_dim": lambda: ((torch.rand(1, 10, 8, 10),), -2), + "randn_mult_batches": lambda: ((torch.randn(2, 10, 10, 10),), 3), } @common.parametrize("test_data", Softmax.test_data) def test_softmax_tosa_MI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = TosaPipelineMI[input_t1](Softmax(dim), data, []) pipeline.add_stage_after( "to_edge_transform_and_lower", pipeline.tester.check_not, [exir_op] @@ -54,7 +53,7 @@ def test_softmax_tosa_MI(test_data): @common.parametrize("test_data", Softmax.test_data) def test_softmax_tosa_BI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = TosaPipelineBI[input_t1](Softmax(dim), data, []) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) @@ -70,7 +69,7 @@ def test_softmax_tosa_BI(test_data): ) @common.XfailIfNoCorstone300 def test_softmax_u55_BI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = EthosU55PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) @@ -86,7 +85,7 @@ def test_softmax_u55_BI(test_data): ) @common.XfailIfNoCorstone320 def test_softmax_u85_BI(test_data): - data, dim = test_data + data, dim = test_data() pipeline = EthosU85PipelineBI[input_t1](Softmax(dim), data, [], run_on_fvp=True) pipeline.add_stage_after("quantize", pipeline.tester.check_not, [aten_op]) pipeline.change_args("run_method_and_compare_outputs", qtol=1) diff --git a/backends/arm/test/ops/test_split.py b/backends/arm/test/ops/test_split.py index b86e27f1a4c..90458584995 100644 --- a/backends/arm/test/ops/test_split.py +++ b/backends/arm/test/ops/test_split.py @@ -1,141 +1,147 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest + +from typing import Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -test_data_t = tuple[torch.Tensor, int | list[int], int] - - -class TestSimpleSplit(unittest.TestCase): - class Split(torch.nn.Module): - - test_data: list[tuple[test_data_t]] = [ - ((torch.rand(10), 2, 0),), - ((torch.rand(10, 10), 3, 1),), - ((torch.rand(10, 10), 4, -1),), - ((torch.rand(10, 15, 10), [2, 2, 11], 1),), - ((torch.rand(4, 4, 4, 4), 2, 0),), - ((torch.rand(4, 4, 4, 4), [1, 1, 1, 1], -2),), - ] - - def forward( - self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int - ): - return x.split(split_size=split_size_or_sections, dim=dim) - - class SplitWithSizes(torch.nn.Module): - def forward(self, x: torch.Tensor, split_sizes: list[int], dim: int): - return x.split_with_sizes(split_sizes=split_sizes, dim=dim) - - class SplitSingleOut(torch.nn.Module): - def forward( - self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int - ): - return x.split(split_size=split_size_or_sections, dim=dim)[1] - - class SplitTwoOut(torch.nn.Module): - def forward( - self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int - ): - return x.split(split_size=split_size_or_sections, dim=dim)[1:3] - - def _test_split_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: test_data_t +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +exir_op = "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default" +input_t1 = Tuple[torch.Tensor] # Input x + + +class Split(torch.nn.Module): + + test_data = { + "split_1d_2_size_0_dim": lambda: (torch.rand(10), 2, 0), + "split_2d_3_size_1_dim": lambda: (torch.rand(10, 10), 3, 1), + "split_2d_2_size_4_dim": lambda: (torch.rand(10, 10), 4, -1), + "split_4d_2_size_2_dim": lambda: (torch.rand(4, 4, 4, 4), 2, 0), + } + + test_data_list = { + "split_3d_2_sizes_dim": lambda: (torch.rand(10, 15, 10), [2, 2, 11], 1), + "split_4d_2_sizes_dim_neg": lambda: (torch.rand(4, 4, 4, 4), [1, 1, 1, 1], -2), + } + + def forward( + self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .to_edge() - .check( - [ - "executorch_exir_dialects_edge__ops_aten_split_with_sizes_copy_default" - ] - ) - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_split_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: test_data_t + return x.split(split_size=split_size_or_sections, dim=dim) + + +class SplitWithSizes(torch.nn.Module): + def forward(self, x: torch.Tensor, split_sizes: list[int], dim: int): + return x.split_with_sizes(split_sizes=split_sizes, dim=dim) + + +class SplitSingleOut(torch.nn.Module): + def forward( + self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int ): + return x.split(split_size=split_size_or_sections, dim=dim)[1] + - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_split_ethosu_BI_pipeline( - self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: test_data_t +class SplitTwoOut(torch.nn.Module): + def forward( + self, x: torch.Tensor, split_size_or_sections: int | list[int], dim: int ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - @parameterized.expand(Split.test_data) - def test_split_tosa_MI(self, test_data: test_data_t): - self._test_split_tosa_MI_pipeline(self.Split(), test_data) - - @parameterized.expand([Split.test_data[3], Split.test_data[5]]) - def test_split_with_sizes_tosa_MI(self, test_data: test_data_t): - assert isinstance(test_data[1], list) - self._test_split_tosa_MI_pipeline(self.SplitWithSizes(), test_data) - - @parameterized.expand(Split.test_data) - def test_split_one_out_tosa_MI(self, test_data: test_data_t): - self._test_split_tosa_MI_pipeline(self.SplitSingleOut(), test_data) - - @parameterized.expand(Split.test_data) - def test_split_two_out_tosa_MI(self, test_data: test_data_t): - self._test_split_tosa_MI_pipeline(self.SplitTwoOut(), test_data) - - @parameterized.expand(Split.test_data) - def test_split_tosa_BI(self, test_data: test_data_t): - self._test_split_tosa_BI_pipeline(self.Split(), test_data) - - @parameterized.expand(Split.test_data) - def test_split_u55_BI(self, test_data: test_data_t): - self._test_split_ethosu_BI_pipeline( - common.get_u55_compile_spec(), self.Split(), test_data - ) - - @parameterized.expand(Split.test_data) - def test_split_u85_BI(self, test_data: test_data_t): - self._test_split_ethosu_BI_pipeline( - common.get_u85_compile_spec(), self.Split(), test_data - ) + return x.split(split_size=split_size_or_sections, dim=dim)[1:3] + + +@common.parametrize( + "test_data", + (Split.test_data | Split.test_data_list), +) +def test_split_with_sizes_tosa_MI(test_data: input_t1): + + pipeline = TosaPipelineMI[input_t1]( + Split(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize("test_data", Split.test_data_list) +def test_split_with_sizes_tosa_MI_2(test_data: input_t1): + + pipeline = TosaPipelineMI[input_t1]( + SplitWithSizes(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize( + "test_data", + (Split.test_data | Split.test_data_list), +) +def test_split_with_sizes_tosa_MI_one_out(test_data: input_t1): + + pipeline = TosaPipelineMI[input_t1]( + SplitSingleOut(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize( + "test_data", + (Split.test_data | Split.test_data_list), +) +def test_split_with_sizes_tosa_BI(test_data: input_t1): + + pipeline = TosaPipelineBI[input_t1]( + Split(), + test_data(), + aten_op=[], + exir_op=exir_op, + ) + pipeline.run() + + +@common.parametrize( + "test_data", + (Split.test_data | Split.test_data_list), +) +def test_split_with_sizes_u55_BI(test_data: input_t1): + pipeline = EthosU55PipelineBI[input_t1]( + Split(), + test_data(), + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize( + "test_data", + (Split.test_data | Split.test_data_list), +) +def test_split_with_sizes_u85_BI(test_data: input_t1): + + pipeline = EthosU85PipelineBI[input_t1]( + Split(), + test_data(), + aten_ops=[], + exir_ops=exir_op, + run_on_fvp=False, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_sqrt.py b/backends/arm/test/ops/test_sqrt.py index 53a1e79c0a8..0c79f534656 100644 --- a/backends/arm/test/ops/test_sqrt.py +++ b/backends/arm/test/ops/test_sqrt.py @@ -31,11 +31,11 @@ def forward(self, x): return torch.sqrt(x) test_data: Dict[str, input_t] = { - "sqrt_tensor_rank1_ones": (torch.ones(10),), - "sqrt_tensor_rank2_random": (torch.rand(5, 10),), - "sqrt_tensor_rank3_ones": (torch.ones(2, 3, 4),), - "sqrt_tensor_rank4_random": (torch.rand(1, 3, 8, 8),), - "sqrt_tensor_rank4_multibatch": (torch.rand(2, 3, 4, 4),), + "sqrt_tensor_rank1_ones": lambda: (torch.ones(10),), + "sqrt_tensor_rank2_random": lambda: (torch.rand(5, 10),), + "sqrt_tensor_rank3_ones": lambda: (torch.ones(2, 3, 4),), + "sqrt_tensor_rank4_random": lambda: (torch.rand(1, 3, 8, 8),), + "sqrt_tensor_rank4_multibatch": lambda: (torch.rand(2, 3, 4, 4),), } @@ -47,7 +47,10 @@ def forward(self, x): @common.parametrize("test_data", Sqrt.test_data) def test_sqrt_tosa_MI(test_data: Sqrt.input_t): pipeline = TosaPipelineMI[Sqrt.input_t]( - Sqrt(), test_data, Sqrt.aten_op_MI, Sqrt.exir_op_MI + Sqrt(), + test_data(), + Sqrt.aten_op_MI, + Sqrt.exir_op_MI, ) pipeline.run() @@ -55,7 +58,10 @@ def test_sqrt_tosa_MI(test_data: Sqrt.input_t): @common.parametrize("test_data", Sqrt.test_data) def test_sqrt_tosa_BI(test_data: Sqrt.input_t): pipeline = TosaPipelineBI[Sqrt.input_t]( - Sqrt(), test_data, Sqrt.aten_op_BI, Sqrt.exir_op_BI + Sqrt(), + test_data(), + Sqrt.aten_op_BI, + Sqrt.exir_op_BI, ) pipeline.run() @@ -64,7 +70,11 @@ def test_sqrt_tosa_BI(test_data: Sqrt.input_t): @common.XfailIfNoCorstone300 def test_sqrt_u55_BI(test_data: Sqrt.input_t): pipeline = EthosU55PipelineBI[Sqrt.input_t]( - Sqrt(), test_data, Sqrt.aten_op_BI, Sqrt.exir_op_BI, run_on_fvp=True + Sqrt(), + test_data(), + Sqrt.aten_op_BI, + Sqrt.exir_op_BI, + run_on_fvp=True, ) pipeline.run() @@ -73,6 +83,10 @@ def test_sqrt_u55_BI(test_data: Sqrt.input_t): @common.XfailIfNoCorstone320 def test_sqrt_u85_BI(test_data: Sqrt.input_t): pipeline = EthosU85PipelineBI[Sqrt.input_t]( - Sqrt(), test_data, Sqrt.aten_op_BI, Sqrt.exir_op_BI, run_on_fvp=True + Sqrt(), + test_data(), + Sqrt.aten_op_BI, + Sqrt.exir_op_BI, + run_on_fvp=True, ) pipeline.run() diff --git a/backends/arm/test/ops/test_squeeze.py b/backends/arm/test/ops/test_squeeze.py index 9f02392e1e2..e5f606c887e 100644 --- a/backends/arm/test/ops/test_squeeze.py +++ b/backends/arm/test/ops/test_squeeze.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,208 +7,194 @@ # Tests the squeeze op which squeezes a given dimension with size 1 into a lower ranked tensor. # -import unittest -from typing import Optional, Tuple + +from typing import Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester - -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestSqueeze(unittest.TestCase): - class SqueezeDim(torch.nn.Module): - test_parameters: list[tuple[torch.Tensor, int]] = [ - (torch.randn(1, 1, 5), -2), - (torch.randn(1, 2, 3, 1), 3), - (torch.randn(1, 5, 1, 5), -2), - ] - - def forward(self, x: torch.Tensor, dim: int): - return x.squeeze(dim) - - class SqueezeDims(torch.nn.Module): - test_parameters: list[tuple[torch.Tensor, tuple[int]]] = [ - (torch.randn(1, 1, 5), (0, 1)), - (torch.randn(1, 5, 5, 1), (0, -1)), - (torch.randn(1, 5, 1, 5), (0, -2)), - ] - - def forward(self, x: torch.Tensor, dims: tuple[int]): - return x.squeeze(dims) - - class Squeeze(torch.nn.Module): - test_parameters: list[tuple[torch.Tensor]] = [ - (torch.randn(1, 1, 5),), - (torch.randn(1, 5, 5, 1),), - (torch.randn(1, 5, 1, 5),), - ] - - def forward(self, x: torch.Tensor): - return x.squeeze() - - def _test_squeeze_tosa_MI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple[torch.Tensor, Optional[tuple[int]]], - export_target: str, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({export_target: 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_squeeze_tosa_BI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple[torch.Tensor, Optional[tuple[int]]], - export_target: str, - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({export_target: 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_squeeze_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: Tuple[torch.Tensor, Optional[tuple[int]]], - export_target: str, - ): - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize() - .export() - .check_count({export_target: 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - @parameterized.expand(Squeeze.test_parameters) - def test_squeeze_tosa_MI( - self, - test_tensor: torch.Tensor, - ): - self._test_squeeze_tosa_MI_pipeline( - self.Squeeze(), (test_tensor,), "torch.ops.aten.squeeze.default" - ) - - @parameterized.expand(Squeeze.test_parameters) - def test_squeeze_tosa_BI( - self, - test_tensor: torch.Tensor, - ): - self._test_squeeze_tosa_BI_pipeline( - self.Squeeze(), (test_tensor,), "torch.ops.aten.squeeze.default" - ) - - @parameterized.expand(Squeeze.test_parameters) - def test_squeeze_u55_BI( - self, - test_tensor: torch.Tensor, - ): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u55_compile_spec(), - self.Squeeze(), - (test_tensor,), - "torch.ops.aten.squeeze.default", - ) - - @parameterized.expand(Squeeze.test_parameters) - def test_squeeze_u85_BI( - self, - test_tensor: torch.Tensor, - ): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u85_compile_spec(), - self.Squeeze(), - (test_tensor,), - "torch.ops.aten.squeeze.default", - ) - - @parameterized.expand(SqueezeDim.test_parameters) - def test_squeeze_dim_tosa_MI(self, test_tensor: torch.Tensor, dim: int): - self._test_squeeze_tosa_MI_pipeline( - self.SqueezeDim(), (test_tensor, dim), "torch.ops.aten.squeeze.dim" - ) - - @parameterized.expand(SqueezeDim.test_parameters) - def test_squeeze_dim_tosa_BI(self, test_tensor: torch.Tensor, dim: int): - self._test_squeeze_tosa_BI_pipeline( - self.SqueezeDim(), (test_tensor, dim), "torch.ops.aten.squeeze.dim" - ) - - @parameterized.expand(SqueezeDim.test_parameters) - def test_squeeze_dim_u55_BI(self, test_tensor: torch.Tensor, dim: int): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u55_compile_spec(), - self.SqueezeDim(), - (test_tensor, dim), - "torch.ops.aten.squeeze.dim", - ) - - @parameterized.expand(SqueezeDim.test_parameters) - def test_squeeze_dim_u85_BI(self, test_tensor: torch.Tensor, dim: int): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u85_compile_spec(), - self.SqueezeDim(), - (test_tensor, dim), - "torch.ops.aten.squeeze.dim", - ) - - @parameterized.expand(SqueezeDims.test_parameters) - def test_squeeze_dims_tosa_MI(self, test_tensor: torch.Tensor, dims: tuple[int]): - self._test_squeeze_tosa_MI_pipeline( - self.SqueezeDims(), (test_tensor, dims), "torch.ops.aten.squeeze.dims" - ) - - @parameterized.expand(SqueezeDims.test_parameters) - def test_squeeze_dims_tosa_BI(self, test_tensor: torch.Tensor, dims: tuple[int]): - self._test_squeeze_tosa_BI_pipeline( - self.SqueezeDims(), (test_tensor, dims), "torch.ops.aten.squeeze.dims" - ) - - @parameterized.expand(SqueezeDims.test_parameters) - def test_squeeze_dims_u55_BI(self, test_tensor: torch.Tensor, dims: tuple[int]): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u55_compile_spec(), - self.SqueezeDims(), - (test_tensor, dims), - "torch.ops.aten.squeeze.dims", - ) - - @parameterized.expand(SqueezeDims.test_parameters) - def test_squeeze_dims_u85_BI(self, test_tensor: torch.Tensor, dims: tuple[int]): - self._test_squeeze_ethosu_BI_pipeline( - common.get_u85_compile_spec(), - self.SqueezeDims(), - (test_tensor, dims), - "torch.ops.aten.squeeze.dims", - ) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t1 = Tuple[torch.Tensor] # Input x + + +class SqueezeDim(torch.nn.Module): + test_parameters = { + "squeeze3d_dim_neg_2": lambda: (torch.randn(1, 1, 5), -2), + "squeeze4d_dim_pos_3": lambda: (torch.randn(1, 2, 3, 1), 3), + "squeeze4d_dim_neg_2": lambda: (torch.randn(1, 5, 1, 5), -2), + } + + def forward(self, x: torch.Tensor, dim: int): + return x.squeeze(dim) + + +class SqueezeDims(torch.nn.Module): + test_parameters = { + "squeeze3d_dims_0_1": lambda: (torch.randn(1, 1, 5), (0, 1)), + "squeeze4d_dims_0_neg_1": lambda: (torch.randn(1, 5, 5, 1), (0, -1)), + "squeeze4d_dims_0_neg_2": lambda: (torch.randn(1, 5, 1, 5), (0, -2)), + } + + def forward(self, x: torch.Tensor, dims: tuple[int]): + return x.squeeze(dims) + + +class Squeeze(torch.nn.Module): + test_parameters = { + "squeeze3d": lambda: (torch.randn(1, 1, 5),), + "squeeze4d_dims": lambda: (torch.randn(1, 5, 5, 1),), + "squeeze3d_dims_mix": lambda: (torch.randn(1, 5, 1, 5),), + } + + def forward(self, x: torch.Tensor): + return x.squeeze() + + +@common.parametrize("test_data", Squeeze.test_parameters) +def test_squeeze_dim_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Squeeze(), + test_data(), + aten_op="torch.ops.aten.squeeze.default", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Squeeze.test_parameters) +def test_squeeze_dim_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Squeeze(), + test_data(), + aten_op="torch.ops.aten.squeeze.default", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Squeeze.test_parameters) +@common.XfailIfNoCorstone300 +def test_squeeze_dim_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Squeeze(), + test_data(), + aten_ops="torch.ops.aten.squeeze.default", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Squeeze.test_parameters) +@common.XfailIfNoCorstone320 +def test_squeeze_dim_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Squeeze(), + test_data(), + aten_ops="torch.ops.aten.squeeze.default", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDim.test_parameters) +def test_squeeze_dim_tosa_MI_2(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + SqueezeDim(), + test_data(), + aten_op="torch.ops.aten.squeeze.dim", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDim.test_parameters) +def test_squeeze_dim_tosa_BI_2(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + SqueezeDim(), + test_data(), + aten_op="torch.ops.aten.squeeze.dim", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDim.test_parameters) +@common.XfailIfNoCorstone300 +def test_squeeze_dim_u55_BI_2(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + SqueezeDim(), + test_data(), + aten_ops="torch.ops.aten.squeeze.dim", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDim.test_parameters) +@common.XfailIfNoCorstone320 +def test_squeeze_dim_u85_BI_2(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + SqueezeDim(), + test_data(), + aten_ops="torch.ops.aten.squeeze.dim", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDims.test_parameters) +def test_squeeze_dims_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + SqueezeDims(), + test_data(), + aten_op="torch.ops.aten.squeeze.dims", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDims.test_parameters) +def test_squeeze_dims_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + SqueezeDims(), + test_data(), + aten_op="torch.ops.aten.squeeze.dims", + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDims.test_parameters) +@common.XfailIfNoCorstone300 +def test_squeeze_dims_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + SqueezeDims(), + test_data(), + aten_ops="torch.ops.aten.squeeze.dims", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", SqueezeDims.test_parameters) +@common.XfailIfNoCorstone320 +def test_squeeze_dims_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + SqueezeDims(), + test_data(), + aten_ops="torch.ops.aten.squeeze.dims", + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_sub.py b/backends/arm/test/ops/test_sub.py index d1849e830c9..f61f3b0583d 100644 --- a/backends/arm/test/ops/test_sub.py +++ b/backends/arm/test/ops/test_sub.py @@ -5,7 +5,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from typing import Tuple import torch @@ -22,23 +21,23 @@ # Single-input subtraction (x - x) sub_test_data = { - "ones_1D_5": (torch.ones(5),), - "ones_1D_50": (torch.ones(50),), - "rand_1D_10": (torch.rand(10),), - "rand_2D_5x5": (torch.rand(5, 5),), - "rand_3D_5x5x5": (torch.rand(5, 5, 5),), - "rand_4D_2x3x4x5": (torch.rand(2, 3, 4, 5),), - "zeros": (torch.zeros(10),), + "ones_1D_5": lambda: (torch.ones(5),), + "ones_1D_50": lambda: (torch.ones(50),), + "rand_1D_10": lambda: (torch.rand(10),), + "rand_2D_5x5": lambda: (torch.rand(5, 5),), + "rand_3D_5x5x5": lambda: (torch.rand(5, 5, 5),), + "rand_4D_2x3x4x5": lambda: (torch.rand(2, 3, 4, 5),), + "zeros": lambda: (torch.zeros(10),), } fvp_sub_xfails = {"rand_4D_2x3x4x5": "MLETORCH-517 : Multiple batches not supported"} # Two-input subtraction (x - y) sub2_test_data = { - "rand_2D_4x4": (torch.rand(4, 4), torch.rand(4, 4)), - "rand_3D_4x4x4": (torch.rand(4, 2, 2), torch.rand(4, 2, 2)), - "rand_4D_2x2x4x4": (torch.rand(2, 2, 4, 4), torch.rand(2, 2, 4, 4)), - "zeros": (torch.rand(4, 4), torch.zeros(4, 4)), + "rand_2D_4x4": lambda: (torch.rand(4, 4), torch.rand(4, 4)), + "rand_3D_4x4x4": lambda: (torch.rand(4, 2, 2), torch.rand(4, 2, 2)), + "rand_4D_2x2x4x4": lambda: (torch.rand(2, 2, 4, 4), torch.rand(2, 2, 4, 4)), + "zeros": lambda: (torch.rand(4, 4), torch.zeros(4, 4)), } fvp_sub2_xfails = {"rand_4D_2x2x4x4": "MLETORCH-517 : Multiple batches not supported"} @@ -58,11 +57,11 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): @common.parametrize("test_data", sub_test_data) -def test_sub_tosa_MI(test_data): +def test_sub_tensor_tosa_MI(test_data): """Test Subtraction (TOSA MI)""" pipeline = TosaPipelineMI[input_t1]( Sub(), - test_data, + test_data(), aten_op, exir_op, ) @@ -70,11 +69,11 @@ def test_sub_tosa_MI(test_data): @common.parametrize("test_data", sub2_test_data) -def test_sub_2_tosa_MI(test_data: Tuple[torch.Tensor, torch.Tensor]): +def test_sub_tensor_tosa_MI_2(test_data: Tuple[torch.Tensor, torch.Tensor]): """Test Two-Operand Subtraction (TOSA MI)""" pipeline = TosaPipelineMI[input_t2]( Sub2(), - test_data, + test_data(), aten_op, exir_op, ) @@ -82,11 +81,11 @@ def test_sub_2_tosa_MI(test_data: Tuple[torch.Tensor, torch.Tensor]): @common.parametrize("test_data", sub_test_data) -def test_sub_tosa_BI(test_data): +def test_sub_tensor_tosa_BI(test_data): """Test Subtraction (TOSA BI)""" pipeline = TosaPipelineBI[input_t1]( Sub(), - test_data, + test_data(), aten_op, exir_op, ) @@ -95,11 +94,11 @@ def test_sub_tosa_BI(test_data): @common.parametrize("test_data", sub2_test_data) -def test_sub_2_tosa_BI(test_data: Tuple[torch.Tensor, torch.Tensor]): +def test_sub_tensor_tosa_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]): """Test Two-Operand Subtraction (TOSA BI)""" pipeline = TosaPipelineBI[input_t2]( Sub2(), - test_data, + test_data(), aten_op, exir_op, ) @@ -107,65 +106,13 @@ def test_sub_2_tosa_BI(test_data: Tuple[torch.Tensor, torch.Tensor]): pipeline.run() -@common.parametrize("test_data", sub_test_data) -def test_sub_u55_BI(test_data): - """Test Subtraction on Ethos-U55""" - pipeline = EthosU55PipelineBI[input_t1]( - Sub(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - -@common.parametrize("test_data", sub2_test_data) -def test_sub_2_u55_BI(test_data: Tuple[torch.Tensor, torch.Tensor]): - """Test Two-Operand Subtraction on Ethos-U55""" - pipeline = EthosU55PipelineBI[input_t2]( - Sub2(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - -@common.parametrize("test_data", sub_test_data) -def test_sub_u85_BI(test_data): - """Test Subtraction on Ethos-U85 (Quantized Mode)""" - pipeline = EthosU85PipelineBI[input_t1]( - Sub(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - -@common.parametrize("test_data", sub2_test_data) -def test_sub_2_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]): - """Test Two-Operand Subtraction on Ethos-U85""" - pipeline = EthosU85PipelineBI[input_t2]( - Sub2(), - test_data, - aten_op, - exir_op, - run_on_fvp=False, - ) - pipeline.run() - - @common.parametrize("test_data", sub_test_data, fvp_sub_xfails) -@common.SkipIfNoCorstone300 -def test_sub_u55_BI_on_fvp(test_data): +@common.XfailIfNoCorstone300 +def test_sub_tensor_u55_BI(test_data): """Test Subtraction on Ethos-U55 (FVP Mode)""" pipeline = EthosU55PipelineBI[input_t1]( Sub(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, @@ -175,12 +122,12 @@ def test_sub_u55_BI_on_fvp(test_data): @common.parametrize("test_data", sub2_test_data, fvp_sub2_xfails) -@common.SkipIfNoCorstone300 -def test_sub_2_u55_BI_on_fvp(test_data: Tuple[torch.Tensor, torch.Tensor]): +@common.XfailIfNoCorstone300 +def test_sub_tensor_u55_BI_2(test_data: Tuple[torch.Tensor, torch.Tensor]): """Test Two-Operand Subtraction on Ethos-U55 (FVP Mode)""" pipeline = EthosU55PipelineBI[input_t2]( Sub2(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, @@ -190,12 +137,12 @@ def test_sub_2_u55_BI_on_fvp(test_data: Tuple[torch.Tensor, torch.Tensor]): @common.parametrize("test_data", sub_test_data, fvp_sub_xfails) -@common.SkipIfNoCorstone320 -def test_sub_u85_BI_on_fvp(test_data): +@common.XfailIfNoCorstone320 +def test_sub_tensor_u85_BI_2(test_data): """Test Subtraction on Ethos-U85 (FVP Mode)""" pipeline = EthosU85PipelineBI[input_t1]( Sub(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, @@ -205,12 +152,12 @@ def test_sub_u85_BI_on_fvp(test_data): @common.parametrize("test_data", sub2_test_data, fvp_sub2_xfails) -@common.SkipIfNoCorstone320 -def test_sub_2_u85_BI_on_fvp(test_data: Tuple[torch.Tensor, torch.Tensor]): +@common.XfailIfNoCorstone320 +def test_sub_tensor_u85_BI(test_data: Tuple[torch.Tensor, torch.Tensor]): """Test Two-Operand Subtraction on Ethos-U85 (FVP Mode)""" pipeline = EthosU85PipelineBI[input_t2]( Sub2(), - test_data, + test_data(), aten_op, exir_op, run_on_fvp=True, diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py index bc0c50b8ee0..8837f1b292d 100644 --- a/backends/arm/test/ops/test_sum.py +++ b/backends/arm/test/ops/test_sum.py @@ -4,155 +4,104 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest - from typing import Tuple import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - -exampledata_t = Tuple[torch.Tensor, int | list[int], bool] -"""(data, dim(s), keepdim)""" - - -class TestSum(unittest.TestCase): - """Tests sum which sums all elements along some specified dimensions. - keepdim specifies whether the dimension that is summed should - be squeezed or not. - """ - - class Sum(torch.nn.Module): - test_parameters: list[Tuple[exampledata_t]] = [ - ((torch.rand(10), 0, True),), - ((torch.rand(10, 10), 1, False),), - ((torch.rand(10, 10, 10), [-3, 1], True),), - ((torch.rand(1, 1, 5, 8), 1, False),), - ((torch.rand(1, 2, 3, 4), 3, True),), - ((torch.rand(1, 2, 8, 8), [2, 3, 0], True),), - ] - - test_parameters_u55: list[Tuple[exampledata_t]] = [ - ((torch.rand(10), 0, True),), - ((torch.rand(10, 10), 1, False),), - ((torch.rand(1, 2, 3, 4), 3, True),), - ((torch.rand(10, 10, 10), [-3, 1], True),), - ((torch.rand(1, 1, 5, 8), 1, False),), - ((torch.rand(1, 2, 8, 8), [2, 3, 0], True),), - ] - - def forward(self, x: torch.Tensor, dim: int, keepdim: bool): - return x.sum(dim=dim, keepdim=keepdim) - - def _test_sum_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: tuple[exampledata_t] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.sum.dim_IntList": 1}) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_sum_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: tuple[exampledata_t] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.sum.dim_IntList": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_sum_ethosu_BI_pipeline( - self, - module: torch.nn.Module, - test_data: tuple[exampledata_t], - compile_spec: CompileSpec, - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.sum.dim_IntList": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) - - @parameterized.expand(Sum.test_parameters) - def test_sum_tosa_MI(self, test_data: tuple[exampledata_t]): - self._test_sum_tosa_MI_pipeline(self.Sum(), test_data) - - @parameterized.expand(Sum.test_parameters) - def test_sum_tosa_BI(self, test_data: tuple[exampledata_t]): - self._test_sum_tosa_BI_pipeline(self.Sum(), test_data) - - @parameterized.expand(Sum.test_parameters_u55) - def test_sum_u55_BI(self, test_data: tuple[exampledata_t]): - self._test_sum_ethosu_BI_pipeline( - self.Sum(), - test_data, - common.get_u55_compile_spec(), - ) - - @parameterized.expand(Sum.test_parameters) - def test_sum_u85_BI(self, test_data: tuple[exampledata_t]): - self._test_sum_ethosu_BI_pipeline( - self.Sum(), - test_data, - common.get_u85_compile_spec(), - ) - - reject_inputs = [ - ((torch.rand((65537, 1, 1)), 0, False),), - ((torch.rand((800, 90, 1)), 2, False),), - ((torch.rand((3, 2, 800, 90)), 1, False),), - ] - - @parameterized.expand(reject_inputs) - def test_reject_sum_u55_BI(self, example_inputs): - ( - ArmTester( - TestSum.Sum(), - example_inputs=example_inputs, - compile_spec=common.get_u55_compile_spec(), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.sum.dim_IntList": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 0}) - .check(["executorch_exir_dialects_edge__ops_aten_sum_dim_IntList"]) - ) +from executorch.backends.arm.test import common + +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.sum.dim_IntList" +input_t1 = Tuple[torch.Tensor] # Input x + + +"""Tests sum which sums all elements along some specified dimensions. +keepdim specifies whether the dimension that is summed should +be squeezed or not. +""" + + +class Sum(torch.nn.Module): + test_parameters = { + "1d_dim_0_keep": lambda: (torch.rand(10), 0, True), + "2d_dim_1_no_keep": lambda: (torch.rand(10, 10), 1, False), + "3d_dims_keep": lambda: (torch.rand(10, 10, 10), [-3, 1], True), + "4d_dims_no_keep": lambda: (torch.rand(1, 1, 5, 8), 1, False), + "4d_dim_3_keep": lambda: (torch.rand(1, 2, 3, 4), 3, True), + "4d_dims_keep": lambda: (torch.rand(1, 2, 8, 8), [2, 3, 0], True), + } + + def forward(self, x: torch.Tensor, dim: int, keepdim: bool): + return x.sum(dim=dim, keepdim=keepdim) + + +@common.parametrize("test_data", Sum.test_parameters) +def test_sum_dim_intlist_tosa_MI(test_data: input_t1): + pipeline = TosaPipelineMI[input_t1]( + Sum(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Sum.test_parameters) +def test_sum_dim_intlist_tosa_BI(test_data: input_t1): + pipeline = TosaPipelineBI[input_t1]( + Sum(), + test_data(), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Sum.test_parameters) +@common.XfailIfNoCorstone300 +def test_view_u55_BI_1_0(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Sum(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Sum.test_parameters) +@common.XfailIfNoCorstone320 +def test_view_u85_BI_1_0(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Sum(), + test_data(), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() + + +reject_inputs = { + "reject_large_0_dim": lambda: (torch.rand((65537, 1, 1)), 0, False), + "reject_large_2_dim": lambda: (torch.rand((800, 90, 1)), 2, False), + "reject_large_1_dim": lambda: (torch.rand((3, 2, 800, 90)), 1, False), +} + + +@common.parametrize("test_data", reject_inputs) +def test_view_u55_BI_failure_set(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Sum(), + test_data(), + aten_op, + exir_ops=[], + ) + pipeline.pop_stage("check_count.exir") + pipeline.run() diff --git a/backends/arm/test/ops/test_tanh.py b/backends/arm/test/ops/test_tanh.py index 8d13620dc4a..73d51cb8c3e 100644 --- a/backends/arm/test/ops/test_tanh.py +++ b/backends/arm/test/ops/test_tanh.py @@ -1,142 +1,85 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. -# All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest - from typing import Tuple -import pytest - import torch -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) +aten_op = "torch.ops.aten.tanh.default" +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +test_data_suite = { # (test_name, test_data) - ("zeros", torch.zeros(10, 10, 10, 10)), - ("ones", torch.ones(10, 10, 10)), - ("rand", torch.rand(10, 10) - 0.5), - ("randn_pos", torch.randn(10) + 10), - ("randn_neg", torch.randn(10) - 10), - ("ramp", torch.arange(-16, 16, 0.2)), -] - - -class TestTanh(unittest.TestCase): - class Tanh(torch.nn.Module): - def __init__(self): - super().__init__() - self.tanh = torch.nn.Tanh() - - def forward(self, x): - return self.tanh(x) - - def _test_tanh_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.tanh.default"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - - def _test_tanh_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.tanh.default"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - if conftest.is_option_enabled("tosa_ref_model"): - tester.run_method_and_compare_outputs(inputs=test_data) - - def _test_tanh_tosa_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.tanh.default": 1}) - .check(["torch.ops.quantized_decomposed"]) - .to_edge() - .partition() - .check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_tanh_tosa_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_tanh_tosa_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_tanh_tosa_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.tensor] - ): - self._test_tanh_tosa_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_tanh_tosa_MI( - self, - test_name: str, - test_data: torch.Tensor, - ): - self._test_tanh_tosa_MI_pipeline(self.Tanh(), (test_data,)) - - @parameterized.expand(test_data_suite) - @pytest.mark.tosa_ref_model - def test_tanh_tosa_BI(self, test_name: str, test_data: torch.Tensor): - self._test_tanh_tosa_BI_pipeline(self.Tanh(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_tanh_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor): - self._test_tanh_tosa_u55_BI_pipeline(self.Tanh(), (test_data,)) - - @parameterized.expand(test_data_suite) - def test_tanh_tosa_u85_BI(self, test_name: str, test_data: torch.Tensor): - self._test_tanh_tosa_u85_BI_pipeline(self.Tanh(), (test_data,)) + "zeros": lambda: torch.zeros(10, 10, 10, 10), + "ones": lambda: torch.ones(10, 10, 10), + "rand": lambda: torch.rand(10, 10) - 0.5, + "randn_pos": lambda: torch.randn(10) + 10, + "randn_neg": lambda: torch.randn(10) - 10, + "ramp": lambda: torch.arange(-16, 16, 0.2), +} + + +class Tanh(torch.nn.Module): + def __init__(self): + super().__init__() + self.tanh = torch.nn.Tanh() + + def forward(self, x): + return self.tanh(x) + + +@common.parametrize("test_data", test_data_suite) +def test_tanh_tosa_MI(test_data: Tuple): + pipeline = TosaPipelineMI[input_t1]( + Tanh(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_tanh_tosa_BI(test_data: Tuple): + pipeline = TosaPipelineBI[input_t1]( + Tanh(), + (test_data(),), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_tanh_u55_BI(test_data: Tuple): + pipeline = EthosU55PipelineBI[input_t1]( + Tanh(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_tanh_u85_BI(test_data: Tuple): + pipeline = EthosU85PipelineBI[input_t1]( + Tanh(), + (test_data(),), + aten_op, + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_to_copy.py b/backends/arm/test/ops/test_to_copy.py index db3e93fbdc9..9d873f30ce9 100644 --- a/backends/arm/test/ops/test_to_copy.py +++ b/backends/arm/test/ops/test_to_copy.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,14 +7,14 @@ # Tests the _to_copy op which is interpreted as a cast for our purposes. # -import unittest +from typing import Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineMI -from parameterized import parameterized +input_t1 = Tuple[torch.Tensor] # Input x class Cast(torch.nn.Module): @@ -27,41 +26,41 @@ def forward(self, x: torch.Tensor): return x.to(dtype=self.target_dtype) -class TestToCopy(unittest.TestCase): - """ - Tests the _to_copy operation. +""" +Tests the _to_copy operation. - Only test unquantized graphs as explicit casting of dtypes messes with the - quantization. +Only test unquantized graphs as explicit casting of dtypes messes with the +quantization. - Note: This is also covered by test_scalars.py. - """ +Note: This is also covered by test_scalars.py. +""" + +_TO_COPY_TEST_DATA = { + "rand_fp16": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float16), torch.float32), + "rand_fp32": lambda: (torch.rand((1, 2, 3, 4), dtype=torch.float32), torch.float16), + "rand_int8": lambda: ( + torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8), + torch.float32, + ), + "rand_int8_int32": lambda: ( + torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8), + torch.int32, + ), + "rand_int32": lambda: ( + torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32), + torch.int8, + ), +} - _TO_COPY_TEST_DATA = ( - (torch.rand((1, 2, 3, 4), dtype=torch.float16), torch.float32), - (torch.rand((1, 2, 3, 4), dtype=torch.float32), torch.float16), - (torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8), torch.float32), - (torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int8), torch.int32), - (torch.randint(-127, 128, (1, 2, 3, 4), dtype=torch.int32), torch.int8), - ) - def _test_to_copy_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: torch.Tensor - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - @parameterized.expand(_TO_COPY_TEST_DATA) - def test_view_tosa_MI(self, test_tensor: torch.Tensor, new_dtype): - self._test_to_copy_tosa_MI_pipeline(Cast(new_dtype), (test_tensor,)) +@common.parametrize("test_data", _TO_COPY_TEST_DATA) +def test_copy_tosa_MI(test_data: Tuple): + test_tensor, new_dtype = test_data() + + pipeline = TosaPipelineMI[input_t1]( + Cast(new_dtype), + (test_tensor,), + aten_op=[], + exir_op=[], + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_unary.py b/backends/arm/test/ops/test_unary.py index 1f91cab56c1..f8359bb4339 100644 --- a/backends/arm/test/ops/test_unary.py +++ b/backends/arm/test/ops/test_unary.py @@ -3,7 +3,6 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. - from typing import Tuple import torch @@ -46,51 +45,51 @@ def forward(self, x: torch.Tensor): test_data = { - "ceil_zeros": ( + "ceil_zeros": lambda: ( Ceil(), zeros, ), - "floor_zeros": ( + "floor_zeros": lambda: ( Floor(), zeros, ), - "ceil_ones": ( + "ceil_ones": lambda: ( Ceil(), ones, ), - "floor_ones": ( + "floor_ones": lambda: ( Floor(), ones, ), - "ceil_rand": ( + "ceil_rand": lambda: ( Ceil(), rand, ), - "floor_rand": ( + "floor_rand": lambda: ( Floor(), rand, ), - "ceil_randn_pos": ( + "ceil_randn_pos": lambda: ( Ceil(), randn_pos, ), - "floor_randn_pos": ( + "floor_randn_pos": lambda: ( Floor(), randn_pos, ), - "ceil_randn_neg": ( + "ceil_randn_neg": lambda: ( Ceil(), randn_neg, ), - "floor_randn_neg": ( + "floor_randn_neg": lambda: ( Floor(), randn_neg, ), - "ceil_ramp": ( + "ceil_ramp": lambda: ( Ceil(), ramp, ), - "floor_ramp": ( + "floor_ramp": lambda: ( Floor(), ramp, ), @@ -99,55 +98,51 @@ def forward(self, x: torch.Tensor): @common.parametrize("test_data", test_data) def test_unary_tosa_MI(test_data: input_t1): - module = test_data[0] + module, test_data = test_data() pipeline = TosaPipelineMI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op + module, + (test_data,), + module.aten_op, + module.exir_op, ) pipeline.run() @common.parametrize("test_data", test_data) def test_unary_tosa_BI(test_data: input_t1): - module = test_data[0] + module, test_data = test_data() pipeline = TosaPipelineBI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op + module, + (test_data,), + module.aten_op, + module.exir_op, ) pipeline.run() @common.parametrize("test_data", test_data) +@common.XfailIfNoCorstone300 def test_unary_u55_BI(test_data: input_t1): - module = test_data[0] + module, test_data = test_data() pipeline = EthosU55PipelineBI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op, run_on_fvp=False + module, + (test_data,), + module.aten_op, + module.exir_op, + run_on_fvp=True, ) pipeline.run() @common.parametrize("test_data", test_data) +@common.XfailIfNoCorstone320 def test_unary_u85_BI(test_data: input_t1): - module = test_data[0] - pipeline = EthosU85PipelineBI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op, run_on_fvp=False - ) - pipeline.run() - - -@common.parametrize("test_data", test_data) -@common.SkipIfNoCorstone300 -def test_unary_u55_BI_on_fvp(test_data: input_t1): - module = test_data[0] - pipeline = EthosU55PipelineBI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op, run_on_fvp=True - ) - pipeline.run() - - -@common.parametrize("test_data", test_data) -@common.SkipIfNoCorstone320 -def test_unary_u85_BI_on_fvp(test_data: input_t1): - module = test_data[0] + module, test_data = test_data() pipeline = EthosU85PipelineBI[input_t1]( - module, (test_data[1],), module.aten_op, module.exir_op, run_on_fvp=True + module, + (test_data,), + module.aten_op, + module.exir_op, + run_on_fvp=True, ) pipeline.run() diff --git a/backends/arm/test/ops/test_unsqueeze.py b/backends/arm/test/ops/test_unsqueeze.py index 68f4fe46123..4ad238a099a 100644 --- a/backends/arm/test/ops/test_unsqueeze.py +++ b/backends/arm/test/ops/test_unsqueeze.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,105 +7,76 @@ # Tests the unsqueeze op which copies the data of the input tensor (possibly with new data format) # -import unittest from typing import Sequence, Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester - -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestSimpleUnsqueeze(unittest.TestCase): - class Unsqueeze(torch.nn.Module): - shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3)] - test_parameters: list[tuple[torch.Tensor]] = [(torch.randn(n),) for n in shapes] - - def forward(self, x: torch.Tensor, dim): - return x.unsqueeze(dim) - - def _test_unsqueeze_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, int] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.unsqueeze.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) - def _test_unsqueeze_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, int] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.unsqueeze.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) +aten_op = "torch.ops.aten.unsqueeze.default" +input_t1 = Tuple[torch.Tensor, torch.scalar_tensor] # Input x, Input y - def _test_unsqueeze_ethosu_BI_pipeline( - self, - compile_spec: CompileSpec, - module: torch.nn.Module, - test_data: Tuple[torch.Tensor, int], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.unsqueeze.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - @parameterized.expand(Unsqueeze.test_parameters) - def test_unsqueeze_tosa_MI(self, test_tensor: torch.Tensor): - for i in range(-test_tensor.dim() - 1, test_tensor.dim() + 1): - self._test_unsqueeze_tosa_MI_pipeline(self.Unsqueeze(), (test_tensor, i)) - - @parameterized.expand(Unsqueeze.test_parameters) - def test_unsqueeze_tosa_BI(self, test_tensor: torch.Tensor): - self._test_unsqueeze_tosa_BI_pipeline(self.Unsqueeze(), (test_tensor, 0)) - - @parameterized.expand(Unsqueeze.test_parameters) - def test_unsqueeze_u55_BI(self, test_tensor: torch.Tensor): - self._test_unsqueeze_ethosu_BI_pipeline( - common.get_u55_compile_spec(), - self.Unsqueeze(), - (test_tensor, 0), - ) +class Unsqueeze(torch.nn.Module): + shapes: list[int | Sequence[int]] = [5, (5, 5), (5, 4), (5, 4, 3)] + test_parameters = {} + for n in shapes: + test_parameters[f"rand_{n}"] = (torch.randn(n),) + + def forward(self, x: torch.Tensor, dim): + return x.unsqueeze(dim) + - @parameterized.expand(Unsqueeze.test_parameters) - def test_unsqueeze_u85_BI(self, test_tensor: torch.Tensor): - self._test_unsqueeze_ethosu_BI_pipeline( - common.get_u85_compile_spec(), - self.Unsqueeze(), - (test_tensor, 0), +@common.parametrize("test_tensor", Unsqueeze.test_parameters) +def test_unsqueeze_tosa_MI(test_tensor: torch.Tensor): + for i in range(-test_tensor[0].dim() - 1, test_tensor[0].dim() + 1): + pipeline = TosaPipelineMI[input_t1]( + Unsqueeze(), + (*test_tensor, i), + aten_op, + exir_op=[], ) + pipeline.run() + + +@common.parametrize("test_tensor", Unsqueeze.test_parameters) +def test_unsqueeze_tosa_BI(test_tensor: torch.Tensor): + pipeline = TosaPipelineBI[input_t1]( + Unsqueeze(), + (*test_tensor, 0), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_tensor", Unsqueeze.test_parameters) +@common.XfailIfNoCorstone300 +def test_unsqueeze_u55_BI(test_tensor: torch.Tensor): + pipeline = EthosU55PipelineBI[input_t1]( + Unsqueeze(), + (*test_tensor, 0), + aten_op, + exir_ops=[], + run_on_fvp=False, + ) + pipeline.run() + + +@common.parametrize("test_tensor", Unsqueeze.test_parameters) +@common.XfailIfNoCorstone320 +def test_unsqueeze_u85_BI(test_tensor: torch.Tensor): + pipeline = EthosU85PipelineBI[input_t1]( + Unsqueeze(), + (*test_tensor, 0), + aten_op, + exir_ops=[], + run_on_fvp=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_upsample_nearest2d.py b/backends/arm/test/ops/test_upsample_nearest2d.py index 8984d716a3d..7809d5fdee2 100644 --- a/backends/arm/test/ops/test_upsample_nearest2d.py +++ b/backends/arm/test/ops/test_upsample_nearest2d.py @@ -1,165 +1,163 @@ -# Copyright 2024 Arm Limited and/or its affiliates. -# All rights reserved. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -import unittest - from typing import Optional, Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester -from parameterized import parameterized +from executorch.backends.arm.test.tester.test_pipeline import ( + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.upsample_nearest2d.vec" +input_t1 = Tuple[torch.Tensor] # Input x -test_data_suite = [ +test_data_suite = { # (test_name, test_data, size, scale_factor, compare_outputs) - ("rand_double_scale", torch.rand(2, 4, 8, 3), None, 2.0, True), - ("rand_double_scale_one_dim", torch.rand(2, 4, 8, 3), None, (1.0, 2.0), True), - ("rand_double_size", torch.rand(2, 4, 8, 3), (16, 6), None, True), - ("rand_one_double_scale", torch.rand(2, 4, 1, 1), None, 2.0, True), - ("rand_one_double_size", torch.rand(2, 4, 1, 1), (2, 2), None, True), - ("rand_one_same_scale", torch.rand(2, 4, 1, 1), None, 1.0, True), - ("rand_one_same_size", torch.rand(2, 4, 1, 1), (1, 1), None, True), + "rand_double_scale": lambda: (torch.rand(2, 4, 8, 3), None, 2.0, True), + "rand_double_scale_one_dim": lambda: ( + torch.rand(2, 4, 8, 3), + None, + (1.0, 2.0), + True, + ), + "rand_double_size": lambda: (torch.rand(2, 4, 8, 3), (16, 6), None, True), + "rand_one_double_scale": lambda: (torch.rand(2, 4, 1, 1), None, 2.0, True), + "rand_one_double_size": lambda: (torch.rand(2, 4, 1, 1), (2, 2), None, True), + "rand_one_same_scale": lambda: (torch.rand(2, 4, 1, 1), None, 1.0, True), + "rand_one_same_size": lambda: (torch.rand(2, 4, 1, 1), (1, 1), None, True), # Can't compare outputs as the rounding when selecting the nearest pixel is # different between PyTorch and TOSA. Just check the legalization went well. # TODO Improve the test infrastructure to support more in depth verification # of the TOSA legalization results. - ("rand_half_scale", torch.rand(2, 4, 8, 6), None, 0.5, False), - ("rand_half_size", torch.rand(2, 4, 8, 6), (4, 3), None, False), - ("rand_one_and_half_scale", torch.rand(2, 4, 8, 3), None, 1.5, False), - ("rand_one_and_half_size", torch.rand(2, 4, 8, 3), (12, 4), None, False), -] - - -class TestUpsampleNearest2d(unittest.TestCase): - class UpsamplingNearest2d(torch.nn.Module): - def __init__( - self, - size: Optional[Tuple[int]], - scale_factor: Optional[float | Tuple[float]], - ): - super().__init__() - self.upsample = torch.nn.UpsamplingNearest2d( # noqa: TOR101 - size=size, scale_factor=scale_factor - ) - - def forward(self, x): - return self.upsample(x) - - class Upsample(torch.nn.Module): - def __init__( - self, - size: Optional[Tuple[int]], - scale_factor: Optional[float | Tuple[float]], - ): - super().__init__() - self.upsample = torch.nn.Upsample( - size=size, scale_factor=scale_factor, mode="nearest" - ) - - def forward(self, x): - return self.upsample(x) - - class Interpolate(torch.nn.Module): - def __init__( - self, - size: Optional[Tuple[int]], - scale_factor: Optional[float | Tuple[float]], - ): - super().__init__() - self.upsample = lambda x: torch.nn.functional.interpolate( - x, size=size, scale_factor=scale_factor, mode="nearest" - ) - - def forward(self, x): - return self.upsample(x) - - def _test_upsample_nearest_2d_tosa_MI_pipeline( - self, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - compare_outputs: bool, - ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check(["torch.ops.aten.upsample_nearest2d.vec"]) - .check_not(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["torch.ops.aten.upsample_nearest2d.vec"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) + "rand_half_scale": lambda: (torch.rand(2, 4, 8, 6), None, 0.5, False), + "rand_half_size": lambda: (torch.rand(2, 4, 8, 6), (4, 3), None, False), + "rand_one_and_half_scale": lambda: (torch.rand(2, 4, 8, 3), None, 1.5, False), + "rand_one_and_half_size": lambda: (torch.rand(2, 4, 8, 3), (12, 4), None, False), +} - if compare_outputs: - tester.run_method_and_compare_outputs(inputs=test_data) - def _test_upsample_nearest_2d_tosa_BI_pipeline( +class UpsamplingNearest2d(torch.nn.Module): + def __init__( self, - module: torch.nn.Module, - test_data: Tuple[torch.tensor], - compare_outputs: bool, + size: Optional[Tuple[int]], + scale_factor: Optional[float | Tuple[float]], ): - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check(["torch.ops.aten.upsample_nearest2d.vec"]) - .check(["torch.ops.quantized_decomposed"]) - .to_edge_transform_and_lower() - .check_not(["torch.ops.aten.upsample_nearest2d.vec"]) - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() + super().__init__() + self.upsample = torch.nn.UpsamplingNearest2d( # noqa: TOR101 + size=size, scale_factor=scale_factor ) - if compare_outputs: - tester.run_method_and_compare_outputs(inputs=test_data) + def forward(self, x): + return self.upsample(x) + - @parameterized.expand(test_data_suite) - def test_upsample_nearest_2d_tosa_MI( +class Upsample(torch.nn.Module): + def __init__( self, - test_name: str, - test_data: torch.Tensor, size: Optional[Tuple[int]], scale_factor: Optional[float | Tuple[float]], - compare_outputs: bool, ): - self._test_upsample_nearest_2d_tosa_MI_pipeline( - self.UpsamplingNearest2d(size, scale_factor), (test_data,), compare_outputs - ) - self._test_upsample_nearest_2d_tosa_MI_pipeline( - self.Upsample(size, scale_factor), (test_data,), compare_outputs - ) - self._test_upsample_nearest_2d_tosa_MI_pipeline( - self.Interpolate(size, scale_factor), (test_data,), compare_outputs + super().__init__() + self.upsample = torch.nn.Upsample( + size=size, scale_factor=scale_factor, mode="nearest" ) - @parameterized.expand(test_data_suite) - def test_upsample_nearest_2d_tosa_BI( + def forward(self, x): + return self.upsample(x) + + +class Interpolate(torch.nn.Module): + def __init__( self, - test_name: str, - test_data: torch.Tensor, size: Optional[Tuple[int]], scale_factor: Optional[float | Tuple[float]], - compare_outputs: bool, ): - self._test_upsample_nearest_2d_tosa_BI_pipeline( - self.UpsamplingNearest2d(size, scale_factor), (test_data,), compare_outputs - ) - self._test_upsample_nearest_2d_tosa_BI_pipeline( - self.Upsample(size, scale_factor), (test_data,), compare_outputs - ) - self._test_upsample_nearest_2d_tosa_BI_pipeline( - self.Interpolate(size, scale_factor), (test_data,), compare_outputs + super().__init__() + self.upsample = lambda x: torch.nn.functional.interpolate( + x, size=size, scale_factor=scale_factor, mode="nearest" ) + + def forward(self, x): + return self.upsample(x) + + +@common.parametrize("test_data", test_data_suite) +def test_upsample_nearest2d_vec_tosa_MI(test_data: torch.Tensor): + test_data, size, scale_factor, compare_outputs = test_data() + + pipeline = TosaPipelineMI[input_t1]( + UpsamplingNearest2d(size, scale_factor), + (test_data,), + aten_op, + exir_op=[], + ) + if not compare_outputs: + pipeline.pop_stage(-1) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_upsample_nearest2d_vec_tosa_MI_nearest(test_data: torch.Tensor): + test_data, size, scale_factor, compare_outputs = test_data() + + pipeline = TosaPipelineMI[input_t1]( + Upsample(size, scale_factor), + (test_data,), + aten_op, + exir_op=[], + ) + if not compare_outputs: + pipeline.pop_stage(-1) + + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_upsample_nearest2d_vec_tosa_MI_interpolate(test_data: torch.Tensor): + test_data, size, scale_factor, compare_outputs = test_data() + + pipeline = TosaPipelineMI[input_t1]( + Interpolate(size, scale_factor), + (test_data,), + aten_op, + exir_op=[], + ) + if not compare_outputs: + pipeline.pop_stage(-1) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_upsample_nearest2d_vec_tosa_BI_interpolate(test_data: torch.Tensor): + test_data, size, scale_factor, compare_outputs = test_data() + + pipeline = TosaPipelineBI[input_t1]( + UpsamplingNearest2d(size, scale_factor), + (test_data,), + aten_op, + exir_op=[], + ) + if not compare_outputs: + pipeline.pop_stage(-1) + pipeline.run() + + +@common.parametrize("test_data", test_data_suite) +def test_upsample_nearest2d_vec_tosa_BI_nearest(test_data: torch.Tensor): + test_data, size, scale_factor, compare_outputs = test_data() + + pipeline = TosaPipelineBI[input_t1]( + Upsample(size, scale_factor), + (test_data,), + aten_op, + exir_op=[], + ) + if not compare_outputs: + pipeline.pop_stage(-1) + + pipeline.run() diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py index fb23f24307e..63681263fab 100644 --- a/backends/arm/test/ops/test_var.py +++ b/backends/arm/test/ops/test_var.py @@ -1,255 +1,321 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -# -# Tests the mean op which changes the size of a Tensor without changing the underlying data. -# -import unittest +from typing import Tuple import torch -from executorch.backends.arm.quantizer import ( - EthosUQuantizer, - get_symmetric_quantization_config, - TOSAQuantizer, + +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, ) -from executorch.backends.arm.test import common, conftest -from executorch.backends.arm.test.tester.arm_tester import ArmTester - -from executorch.backends.arm.tosa_specification import TosaSpecification -from executorch.backends.xnnpack.test.tester.tester import Quantize -from executorch.exir.backend.backend_details import CompileSpec - -from parameterized import parameterized - - -class TestVar(unittest.TestCase): - - class Var(torch.nn.Module): - test_parameters = [ - (torch.randn(1, 50, 10, 20), True, 0), - (torch.rand(1, 50, 10), False, 0), - (torch.randn(1, 30, 15, 20), True, 1), - (torch.rand(1, 50, 10, 20), False, 0.5), - ] - - def __init__(self, keepdim: bool = True, correction: int = 0): - super().__init__() - self.keepdim = keepdim - self.correction = correction - - def forward( - self, - x: torch.Tensor, - ): - return x.var(keepdim=self.keepdim, correction=self.correction) - - class VarDim(torch.nn.Module): - test_parameters = [ - (torch.randn(1, 50, 10, 20), 1, True, False), - (torch.rand(1, 50, 10), -2, False, False), - (torch.randn(1, 30, 15, 20), -3, True, True), - (torch.rand(1, 50, 10, 20), -1, False, True), - ] - - test_parameters_u55 = [ - (torch.randn(1, 50, 10, 20), 1, True, False), - (torch.randn(1, 30, 15, 20), -3, True, True), - ] - - test_parameters_u55_xfails = [ - (torch.rand(1, 50, 10), -2, True, False), - (torch.rand(1, 50, 10, 20), -1, True, True), - ] - - def __init__(self, dim: int = -1, keepdim: bool = True, unbiased: bool = False): - super().__init__() - self.dim = dim - self.keepdim = keepdim - self.unbiased = unbiased - - def forward( - self, - x: torch.Tensor, - ): - return x.var(dim=self.dim, keepdim=self.keepdim, unbiased=self.unbiased) - - class VarCorrection(torch.nn.Module): - test_parameters = [ - (torch.randn(1, 50, 10, 20), (-1, -2), True, 0), - (torch.rand(1, 50, 10), (-2), True, 0), - (torch.randn(1, 30, 15, 20), (-1, -2, -3), True, 1), - (torch.rand(1, 50, 10, 20), (-1, -2), True, 0.5), - ] - - def __init__( - self, dim: int = -1, keepdim: bool = True, correction: bool = False - ): - super().__init__() - self.dim = dim - self.keepdim = keepdim - self.correction = correction - - def forward( - self, - x: torch.Tensor, - ): - return x.var(dim=self.dim, keepdim=self.keepdim, correction=self.correction) - - def _test_var_tosa_MI_pipeline( + +input_t1 = Tuple[torch.Tensor] # Input x + + +class Var(torch.nn.Module): + test_parameters = { + "var_4d_keep_dim_0_correction": lambda: (torch.randn(1, 50, 10, 20), True, 0), + "var_3d_no_keep_dim_0_correction": lambda: (torch.rand(1, 50, 10), False, 0), + "var_4d_keep_dim_1_correction": lambda: (torch.randn(1, 30, 15, 20), True, 1), + "var_4d_no_keep_dim_0_5_correction": lambda: ( + torch.rand(1, 50, 10, 20), + False, + 0.5, + ), + } + + def __init__(self, keepdim: bool = True, correction: int = 0): + super().__init__() + self.keepdim = keepdim + self.correction = correction + + def forward( self, - module: torch.nn.Module, - test_data: torch.Tensor, - target_str: str = None, + x: torch.Tensor, ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_var_tosa_BI_pipeline( + return x.var(keepdim=self.keepdim, correction=self.correction) + + +class VarDim(torch.nn.Module): + test_parameters = { + "var_4d_dim_1_keep_dim_unbiased": lambda: ( + torch.randn(1, 50, 10, 20), + 1, + True, + False, + ), + "var_3d_dim_neg_2_no_keep_dim_unbiased": lambda: ( + torch.rand(1, 50, 10), + -2, + False, + False, + ), + "var_3d_dim_neg_3_keep_dim_biased": lambda: ( + torch.randn(1, 30, 15, 20), + -3, + True, + True, + ), + "var_3d_dim_neg_1_no_keep_dim_biased": lambda: ( + torch.rand(1, 50, 10, 20), + -1, + False, + True, + ), + } + + test_parameters_u55 = { + "var_4d_dim_1_keep_dim_unbiased": lambda: ( + torch.randn(1, 50, 10, 20), + 1, + True, + False, + ), + "var_4d_dim_neg_3_keep_dim_biased": lambda: ( + torch.randn(1, 30, 15, 20), + -3, + True, + True, + ), + } + + test_parameters_u55_xfails = { + "var_3d_dim_neg_2_keep_dim_unbiased": lambda: ( + torch.rand(1, 50, 10), + -2, + True, + False, + ), + "var_3d_dim_neg_1_keep_dim_biased": lambda: ( + torch.rand(1, 50, 10, 20), + -1, + True, + True, + ), + } + + def __init__(self, dim: int = -1, keepdim: bool = True, unbiased: bool = False): + super().__init__() + self.dim = dim + self.keepdim = keepdim + self.unbiased = unbiased + + def forward( self, - module: torch.nn.Module, - test_data: torch.Tensor, - target_str: str = None, + x: torch.Tensor, ): - tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") - compile_spec = common.get_tosa_compile_spec(tosa_spec) - quantizer = TOSAQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) - ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_var_ethosu_BI_pipeline( + return x.var(dim=self.dim, keepdim=self.keepdim, unbiased=self.unbiased) + + +class VarCorrection(torch.nn.Module): + test_parameters = { + "var_4d_dims_keep_dim_0_correction": lambda: ( + torch.randn(1, 50, 10, 20), + (-1, -2), + True, + 0, + ), + "var_3d_dims_keep_dim_0_correction": lambda: ( + torch.rand(1, 50, 10), + (-2), + True, + 0, + ), + "var_4d_dims_keep_dim_1_correction": lambda: ( + torch.randn(1, 30, 15, 20), + (-1, -2, -3), + True, + 1, + ), + "var_4d_dims_keep_dim_0_5_correction": lambda: ( + torch.rand(1, 50, 10, 20), + (-1, -2), + True, + 0.5, + ), + } + + def __init__(self, dim: int = -1, keepdim: bool = True, correction: bool = False): + super().__init__() + self.dim = dim + self.keepdim = keepdim + self.correction = correction + + def forward( self, - module: torch.nn.Module, - compile_spec: CompileSpec, - test_data: torch.Tensor, - target_str: str = None, - ): - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - tester = ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) - .export() - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .serialize() - ) - if conftest.is_option_enabled("corstone_fvp"): - tester.run_method_and_compare_outputs(inputs=test_data, qtol=1) - - @parameterized.expand(Var.test_parameters) - def test_var_tosa_MI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_tosa_MI_pipeline(self.Var(keepdim, correction), (test_tensor,)) - - @parameterized.expand(Var.test_parameters) - def test_var_tosa_BI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_tosa_BI_pipeline(self.Var(keepdim, correction), (test_tensor,)) - - @parameterized.expand(Var.test_parameters) - def test_var_u55_BI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_ethosu_BI_pipeline( - self.Var(keepdim, correction), - common.get_u55_compile_spec(), - (test_tensor,), - ) - - @parameterized.expand(Var.test_parameters) - def test_var_u85_BI(self, test_tensor: torch.Tensor, keepdim, correction): - self._test_var_ethosu_BI_pipeline( - self.Var(keepdim, correction), - common.get_u85_compile_spec(), - (test_tensor,), - ) - - @parameterized.expand(VarDim.test_parameters) - def test_var_dim_tosa_MI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): - self._test_var_tosa_MI_pipeline( - self.VarDim(dim, keepdim, unbiased), (test_tensor,) - ) - - @parameterized.expand(VarDim.test_parameters) - def test_var_dim_tosa_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): - self._test_var_tosa_BI_pipeline( - self.VarDim(dim, keepdim, unbiased), (test_tensor,) - ) - - @parameterized.expand(VarDim.test_parameters_u55) - def test_var_dim_u55_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): - self._test_var_ethosu_BI_pipeline( - self.VarDim(dim, keepdim, unbiased), - common.get_u55_compile_spec(), - (test_tensor,), - ) - - @parameterized.expand(VarDim.test_parameters) - def test_var_dim_u85_BI(self, test_tensor: torch.Tensor, dim, keepdim, unbiased): - self._test_var_ethosu_BI_pipeline( - self.VarDim(dim, keepdim, unbiased), - common.get_u85_compile_spec(), - (test_tensor,), - ) - - @parameterized.expand(VarCorrection.test_parameters) - def test_var_correction_tosa_MI( - self, test_tensor: torch.Tensor, dim, keepdim, correction - ): - self._test_var_tosa_MI_pipeline( - self.VarCorrection(dim, keepdim, correction), (test_tensor,) - ) - - @parameterized.expand(VarCorrection.test_parameters) - def test_var_correction_tosa_BI( - self, test_tensor: torch.Tensor, dim, keepdim, correction - ): - self._test_var_tosa_BI_pipeline( - self.VarCorrection(dim, keepdim, correction), (test_tensor,) - ) - - @parameterized.expand(VarCorrection.test_parameters) - def test_var_correction_u55_BI( - self, test_tensor: torch.Tensor, dim, keepdim, correction - ): - self._test_var_ethosu_BI_pipeline( - self.VarCorrection(dim, keepdim, correction), - common.get_u55_compile_spec(), - (test_tensor,), - ) - - @parameterized.expand(VarCorrection.test_parameters) - def test_var_correction_u85_BI( - self, test_tensor: torch.Tensor, dim, keepdim, correction + x: torch.Tensor, ): - self._test_var_ethosu_BI_pipeline( - self.VarCorrection(dim, keepdim, correction), - common.get_u85_compile_spec(), - (test_tensor,), - ) + return x.var(dim=self.dim, keepdim=self.keepdim, correction=self.correction) + + +@common.parametrize("test_data", Var.test_parameters) +def test_var_dim_tosa_MI_no_dim(test_data: Tuple): + test_data, keepdim, correction = test_data() + pipeline = TosaPipelineMI[input_t1]( + Var(keepdim, correction), + (test_data,), + aten_op=[], + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", Var.test_parameters) +def test_var_dim_tosa_BI_no_dim(test_data: Tuple): + test_data, keepdim, correction = test_data() + pipeline = TosaPipelineBI[input_t1]( + Var(keepdim, correction), + (test_data,), + aten_op=[], + exir_op=[], + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Var.test_parameters) +@common.XfailIfNoCorstone300 +def test_var_dim_u55_BI_no_dim(test_data: Tuple): + test_data, keepdim, correction = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + Var(keepdim, correction), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", Var.test_parameters) +@common.XfailIfNoCorstone320 +def test_var_dim_u85_BI_no_dim(test_data: Tuple): + test_data, keepdim, correction = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + Var(keepdim, correction), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarDim.test_parameters) +def test_var_dim_tosa_MI(test_data: Tuple): + test_data, dim, keepdim, unbiased = test_data() + pipeline = TosaPipelineMI[input_t1]( + VarDim(dim, keepdim, unbiased), + (test_data,), + aten_op=[], + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", VarDim.test_parameters) +def test_var_dim_tosa_BI(test_data: Tuple): + + test_data, dim, keepdim, unbiased = test_data() + pipeline = TosaPipelineBI[input_t1]( + VarDim(dim, keepdim, unbiased), + (test_data,), + aten_op=[], + exir_op=[], + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarDim.test_parameters_u55) +@common.XfailIfNoCorstone300 +def test_var_dim_u55_BI(test_data: Tuple): + test_data, dim, keepdim, unbiased = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + VarDim(dim, keepdim, unbiased), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarDim.test_parameters) +@common.XfailIfNoCorstone320 +def test_var_dim_u85_BI(test_data: Tuple): + test_data, dim, keepdim, unbiased = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + VarDim(dim, keepdim, unbiased), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarCorrection.test_parameters) +def test_var_dim_tosa_MI_correction(test_data: Tuple): + test_data, dim, keepdim, correction = test_data() + pipeline = TosaPipelineMI[input_t1]( + VarCorrection(dim, keepdim, correction), + (test_data,), + aten_op=[], + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", VarCorrection.test_parameters) +def test_var_dim_tosa_BI_correction(test_data: Tuple): + test_data, dim, keepdim, correction = test_data() + pipeline = TosaPipelineBI[input_t1]( + VarCorrection(dim, keepdim, correction), + (test_data,), + aten_op=[], + exir_op=[], + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarCorrection.test_parameters) +@common.XfailIfNoCorstone300 +def test_var_dim_u55_BI_correction(test_data: Tuple): + test_data, dim, keepdim, correction = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + VarCorrection(dim, keepdim, correction), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() + + +@common.parametrize("test_data", VarCorrection.test_parameters) +@common.XfailIfNoCorstone320 +def test_var_dim_u85_BI_correction(test_data: Tuple): + test_data, dim, keepdim, correction = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + VarCorrection(dim, keepdim, correction), + (test_data,), + aten_ops=[], + exir_ops=[], + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py index f90ae402067..a899be6750d 100644 --- a/backends/arm/test/ops/test_view.py +++ b/backends/arm/test/ops/test_view.py @@ -1,5 +1,4 @@ # Copyright 2024-2025 Arm Limited and/or its affiliates. -# All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -8,131 +7,90 @@ # Tests the view op which changes the size of a Tensor without changing the underlying data. # -import unittest from typing import Tuple import torch from executorch.backends.arm.test import common -from executorch.backends.arm.test.tester.arm_tester import ArmTester - -from executorch.exir.backend.compile_spec_schema import CompileSpec -from parameterized import parameterized - - -class TestView(unittest.TestCase): - """Tests the view operation.""" - - class View(torch.nn.Module): - - needs_transpose_tests = [ - (torch.rand(100), (1, -1, 5, 2)), - (torch.rand(10, 2, 1, 5), (1, -1, 5, 2)), - (torch.rand(1, 2, 1, 9), (3, 1, 3, 2)), - (torch.rand(2, 1, 1, 9), (3, 2, 3, 1)), - (torch.rand(2, 50, 2, 1), (1, 200)), - (torch.rand(2, 5, 2, 3), (1, 15, 4)), - ] - - no_transpose_tests = [ - (torch.rand(2, 1, 1, 9), (3, 1, 3, 2)), - (torch.rand(5, 10, 1, 1), (25, 2, 1, 1)), - (torch.rand(10, 2), (1, 1, 5, 4)), - (torch.rand(10, 10), (5, 1, 5, 4)), - (torch.rand(1, 1, 1, 10), (1, 1, 10, 1)), - (torch.rand(1, 1, 5, 10), (1, 1, 50, 1)), - (torch.rand(5, 10, 1, 1), (1, 25, 2)), - (torch.rand(2, 50, 1, 1), (1, 100)), - (torch.rand(2, 3, 2, 3), (2, 3, 3, 2)), - ] - - def forward(self, x: torch.Tensor, new_shape): - return x.view(new_shape) - - def _test_view_tosa_MI_pipeline( - self, module: torch.nn.Module, test_data: torch.Tensor - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+MI"), - ) - .export() - .check_count({"torch.ops.aten.view.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data) - ) - - def _test_view_tosa_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) - .quantize() - .export() - .check_count({"torch.ops.aten.view.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - .run_method_and_compare_outputs(inputs=test_data, qtol=1) - ) - - def _test_view_ethos_BI_pipeline( - self, - compile_spec: list[CompileSpec], - module: torch.nn.Module, - test_data: Tuple[torch.Tensor], - ): - ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=compile_spec, - ) - .quantize() - .export() - .check_count({"torch.ops.aten.view.default": 1}) - .to_edge() - .partition() - .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) - .to_executorch() - ) - - def _test_view_u55_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - self._test_view_ethos_BI_pipeline( - common.get_u55_compile_spec(), module, test_data - ) - - def _test_view_u85_BI_pipeline( - self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] - ): - self._test_view_ethos_BI_pipeline( - common.get_u85_compile_spec(), module, test_data - ) - - @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests) - def test_view_tosa_MI(self, test_tensor: torch.Tensor, new_shape): - self._test_view_tosa_MI_pipeline(self.View(), (test_tensor, new_shape)) - - @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests) - def test_view_tosa_BI(self, test_tensor: torch.Tensor, new_shape): - self._test_view_tosa_BI_pipeline(self.View(), (test_tensor, new_shape)) - - @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests) - def test_view_u55_BI(self, test_tensor: torch.Tensor, new_shape): - self._test_view_u55_BI_pipeline(self.View(), (test_tensor, new_shape)) - - @parameterized.expand(View.needs_transpose_tests + View.no_transpose_tests) - def test_view_u85_BI(self, test_tensor: torch.Tensor, new_shape): - self._test_view_u85_BI_pipeline(self.View(), (test_tensor, new_shape)) +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +aten_op = "torch.ops.aten.view.default" + +input_t1 = Tuple[torch.Tensor, torch.Tensor] # Input x, Input y + + +class View(torch.nn.Module): + + needs_transpose_tests = { + "rand_1d_neg": lambda: (torch.rand(100), (1, -1, 5, 2)), + "rand_4d_neg": lambda: (torch.rand(10, 2, 1, 5), (1, -1, 5, 2)), + "rand_4d_4d_small": lambda: (torch.rand(1, 2, 1, 9), (3, 1, 3, 2)), + "rand_4d_4d": lambda: (torch.rand(2, 1, 1, 9), (3, 2, 3, 1)), + "rand_4d_2d": lambda: (torch.rand(2, 50, 2, 1), (1, 200)), + "rand_4d_3d": lambda: (torch.rand(2, 5, 2, 3), (1, 15, 4)), + "rand_4d_1": lambda: (torch.rand(2, 1, 1, 9), (3, 1, 3, 2)), + "rand_4d_2": lambda: (torch.rand(5, 10, 1, 1), (25, 2, 1, 1)), + "rand_4d_2_4": lambda: (torch.rand(10, 2), (1, 1, 5, 4)), + "rand_4d_2_4_big": lambda: (torch.rand(10, 10), (5, 1, 5, 4)), + "rand_4d_4_4": lambda: (torch.rand(1, 1, 1, 10), (1, 1, 10, 1)), + "rand_4d_4_4_big": lambda: (torch.rand(1, 1, 5, 10), (1, 1, 50, 1)), + "rand_4d_4_3": lambda: (torch.rand(5, 10, 1, 1), (1, 25, 2)), + "rand_4d_4_2": lambda: (torch.rand(2, 50, 1, 1), (1, 100)), + "rand_4d_2_4_same": lambda: (torch.rand(2, 3, 2, 3), (2, 3, 3, 2)), + } + + def forward(self, x: torch.Tensor, new_shape): + return x.view(new_shape) + + +@common.parametrize("test_data", View.needs_transpose_tests) +def test_view_tosa_MI(test_data: Tuple): + test_tensor, new_shape = test_data() + pipeline = TosaPipelineMI[input_t1]( + View(), + (test_tensor, new_shape), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", View.needs_transpose_tests) +def test_view_tosa_BI(test_data: Tuple): + test_tensor, new_shape = test_data() + pipeline = TosaPipelineBI[input_t1]( + View(), + (test_tensor, new_shape), + aten_op, + exir_op=[], + ) + pipeline.run() + + +@common.parametrize("test_data", View.needs_transpose_tests) +def test_view_u55_BI(test_data: Tuple): + test_tensor, new_shape = test_data() + pipeline = EthosU55PipelineBI[input_t1]( + View(), + (test_tensor, new_shape), + aten_op, + exir_ops=[], + ) + pipeline.run() + + +@common.parametrize("test_data", View.needs_transpose_tests) +def test_view_u85_BI(test_data: Tuple): + test_tensor, new_shape = test_data() + pipeline = EthosU85PipelineBI[input_t1]( + View(), + (test_tensor, new_shape), + aten_op, + exir_ops=[], + ) + pipeline.run() diff --git a/backends/arm/test/ops/test_where.py b/backends/arm/test/ops/test_where.py index 91d616232fa..7bfd27ac0a8 100644 --- a/backends/arm/test/ops/test_where.py +++ b/backends/arm/test/ops/test_where.py @@ -5,15 +5,13 @@ from typing import List, Tuple -import pytest - import torch from executorch.backends.arm.quantizer import ( EthosUQuantizer, get_symmetric_quantization_config, - TOSAQuantizer, ) + from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.test_pipeline import ( EthosU85PipelineBI, @@ -124,18 +122,18 @@ def scalar_condition(input: torch.Tensor): ) test_modules_common = { - "two_dim_tensor_cond": two_dim_tensor_cond, - "three_dim_tensor_cond": three_dim_tensor_cond, - "float32_tensor_cond": float32_tensor_cond, - "two_dim_scalar_cond": two_dim_scalar_cond, - "three_dim_scalar_cond": three_dim_scalar_cond, - "float32_scalar_cond": float32_scalar_cond, + "two_dim_tensor_cond": lambda: two_dim_tensor_cond, + "three_dim_tensor_cond": lambda: three_dim_tensor_cond, + "float32_tensor_cond": lambda: float32_tensor_cond, + "two_dim_scalar_cond": lambda: two_dim_scalar_cond, + "three_dim_scalar_cond": lambda: three_dim_scalar_cond, + "float32_scalar_cond": lambda: float32_scalar_cond, } test_modules_MI = { **test_modules_common, - "float32_tensor_cond_tuple_dtype": float32_tensor_cond_tuple_dtype, - "float32_tensor_cond_tuple_dtype_bool": float32_tensor_cond_tuple_dtype_bool, + "float32_tensor_cond_tuple_dtype": lambda: float32_tensor_cond_tuple_dtype, + "float32_tensor_cond_tuple_dtype_bool": lambda: float32_tensor_cond_tuple_dtype_bool, } test_modules_BI = { @@ -146,93 +144,51 @@ def scalar_condition(input: torch.Tensor): @common.parametrize("test_module", test_modules_MI) -def test_where_tosa_MI(test_module): +def test_where_self_tosa_MI(test_module): pipeline = TosaPipelineMI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, ) pipeline.run() @common.parametrize("test_module", test_modules_BI) -def test_where_tosa_BI(test_module): - compile_spec = common.get_tosa_compile_spec("TOSA-0.80+BI") - quantizer = TOSAQuantizer(compile_spec).set_io(get_symmetric_quantization_config()) +def test_where_self_tosa_BI(test_module): pipeline = TosaPipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op - ) - pipeline.change_args( - "quantize", Quantize(quantizer, get_symmetric_quantization_config()) + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + symmetric_io_quantization=True, ) pipeline.run() @common.parametrize("test_module", test_modules_BI) -def test_where_u55_BI(test_module): - compile_spec = common.get_u55_compile_spec() - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - +@common.XfailIfNoCorstone300 +def test_where_self_u55_BI_not_delegated(test_module): # There will be one full_like op which will be delegated. num_delegates = 1 num_exir = 0 - pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", - { - exir_op: 1, - "executorch_exir_dialects_edge__ops_aten_full_default": num_exir, - }, - num_delegates, - ) - - pipeline.change_args( - "quantize", Quantize(quantizer, get_symmetric_quantization_config()) - ) - pipeline.run() - - -@common.parametrize("test_module", test_modules_BI) -def test_where_u85_BI(test_module): - compile_spec = common.get_u85_compile_spec() - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) - pipeline = EthosU85PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=False - ) - pipeline.change_args( - "quantize", Quantize(quantizer, get_symmetric_quantization_config()) - ) - pipeline.run() - - -@common.parametrize("test_module", test_modules_BI) -@pytest.mark.skip(reason="The same as test_where_u55_BI") -@common.XfailIfNoCorstone300 -def test_where_u55_BI_on_fvp(test_module): compile_spec = common.get_u55_compile_spec() quantizer = EthosUQuantizer(compile_spec).set_io( get_symmetric_quantization_config() ) - # There will be one full_like op which will be delegated. - num_delegates = 1 - num_exir = 0 - pipeline = OpNotSupportedPipeline[input_t]( - test_module, - test_module.get_inputs(), - "TOSA-0.80+BI+u55", + test_module(), + test_module().get_inputs(), { exir_op: 1, "executorch_exir_dialects_edge__ops_aten_full_default": num_exir, }, num_delegates, + quantize=True, + u55_subset=True, ) - pipeline.change_args( "quantize", Quantize(quantizer, get_symmetric_quantization_config()) ) @@ -241,15 +197,14 @@ def test_where_u55_BI_on_fvp(test_module): @common.parametrize("test_module", test_modules_BI) @common.XfailIfNoCorstone320 -def test_where_u85_BI_on_fvp(test_module): - compile_spec = common.get_u85_compile_spec() - quantizer = EthosUQuantizer(compile_spec).set_io( - get_symmetric_quantization_config() - ) +def test_where_self_u85_BI(test_module): + pipeline = EthosU85PipelineBI[input_t]( - test_module, test_module.get_inputs(), aten_op, exir_op, run_on_fvp=True - ) - pipeline.change_args( - "quantize", Quantize(quantizer, get_symmetric_quantization_config()) + test_module(), + test_module().get_inputs(), + aten_op, + exir_op, + run_on_fvp=True, + symmetric_io_quantization=True, ) pipeline.run() diff --git a/backends/arm/test/tester/test_pipeline.py b/backends/arm/test/tester/test_pipeline.py index 38d82b739e1..c4c90064bce 100644 --- a/backends/arm/test/tester/test_pipeline.py +++ b/backends/arm/test/tester/test_pipeline.py @@ -13,8 +13,9 @@ get_symmetric_quantization_config, TOSAQuantizer, ) -from executorch.backends.arm.test import common +from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester, RunPasses +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec @@ -281,8 +282,14 @@ def __init__( rtol: float = 1e-03, qtol: int = 0, ): + tosa_profiles = { + "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"), + "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"), + } + tosa_version = conftest.get_option("tosa_version") + compile_spec = common.get_tosa_compile_spec( - tosa_version, custom_path=custom_path + tosa_profiles[tosa_version], custom_path=custom_path ) quant_stage = ( Quantize( @@ -371,8 +378,14 @@ def __init__( rtol: float = 1e-03, qtol: int = 0, ): + tosa_profiles = { + "0.80": TosaSpecification.create_from_string("TOSA-0.80+MI"), + "1.0": TosaSpecification.create_from_string("TOSA-1.0+FP"), + } + tosa_version = conftest.get_option("tosa_version") + compile_spec = common.get_tosa_compile_spec( - tosa_version, custom_path=custom_path + tosa_profiles[tosa_version], custom_path=custom_path ) super().__init__( module, @@ -663,7 +676,6 @@ class TransformAnnotationPassPipeline(BasePipelineMaker, Generic[T]): Attributes: module: The module which the pipeline is applied to. test_data: Data used for testing the module. - tosa_version: The TOSA-version which to test for. custom_path : Path to dump intermediate artifacts such as tosa and pte to. @@ -673,11 +685,16 @@ def __init__( self, module: torch.nn.Module, test_data: T, - tosa_version: str, custom_path: str = None, ): + tosa_profiles = { + "0.80": TosaSpecification.create_from_string("TOSA-0.80+BI"), + "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT"), + } + tosa_version = conftest.get_option("tosa_version") + compile_spec = common.get_tosa_compile_spec( - tosa_version, custom_path=custom_path + tosa_profiles[tosa_version], custom_path=custom_path ) super().__init__( module, @@ -723,11 +740,21 @@ def __init__( self, module: torch.nn.Module, test_data: T, - tosa_version: str, non_delegated_ops: Dict[str, int], n_expected_delegates: int = 0, custom_path: str = None, + quantize: Optional[bool] = False, + u55_subset: Optional[bool] = False, ): + tosa_profiles = { + "0.80": "TOSA-0.80+" + ("BI" if quantize else "MI"), + "1.0": "TOSA-1.0+" + ("INT" if quantize else "FP"), + } + tosa_version = tosa_profiles[conftest.get_option("tosa_version")] + + if u55_subset and quantize: + tosa_version = f"{tosa_version}+u55" + compile_spec = common.get_tosa_compile_spec( tosa_version, custom_path=custom_path ) @@ -739,7 +766,7 @@ def __init__( [], ) - if "BI" in tosa_version: + if "INT" in tosa_version or "BI" in tosa_version: self.add_stage(self.tester.quantize, pos=0) self.change_args("check_not.exir", []) From 6346bfd3b6b7875e082a3886d0f301a694e3b2b1 Mon Sep 17 00:00:00 2001 From: jathu Date: Thu, 8 May 2025 10:14:34 -0700 Subject: [PATCH 009/178] Automatically announce declared options (#10766) ### Summary Instead of manually printing all the options in `tools/cmake/Utils.cmake`, let's just "automatically" print all the configured options. ### Test plan ``` $ ./scripts/build_apple_frameworks.sh --Debug -- --- Configurated Options --- -- EXECUTORCH_ENABLE_LOGGING : ON -- --------------------------- ``` ``` $ ./scripts/build_apple_frameworks.sh --Release -- --- Configurated Options --- -- EXECUTORCH_ENABLE_LOGGING : OFF -- --------------------------- ``` cc @larryliu0820 --- CMakeLists.txt | 3 ++ tools/cmake/Utils.cmake | 4 --- tools/cmake/common/preset.cmake | 50 +++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45993721a66..2d3f8e5f907 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,9 @@ project(executorch) include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake) include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake) +# Print all the configs that were called with announce_configured_options. +print_configured_options() + # MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION ---------------------------------------------------- include(tools/cmake/Utils.cmake) diff --git a/tools/cmake/Utils.cmake b/tools/cmake/Utils.cmake index 0a09f51fd28..3155c3fc16e 100644 --- a/tools/cmake/Utils.cmake +++ b/tools/cmake/Utils.cmake @@ -30,10 +30,6 @@ function(executorch_print_configuration_summary) message(STATUS " BUCK2 : ${BUCK2}") message(STATUS " PYTHON_EXECUTABLE : ${PYTHON_EXECUTABLE}") message(STATUS " FLATC_EXECUTABLE : ${FLATC_EXECUTABLE}") - message( - STATUS - " EXECUTORCH_ENABLE_LOGGING : ${EXECUTORCH_ENABLE_LOGGING}" - ) message(STATUS " EXECUTORCH_ENABLE_PROGRAM_VERIFICATION : " "${EXECUTORCH_ENABLE_PROGRAM_VERIFICATION}" ) diff --git a/tools/cmake/common/preset.cmake b/tools/cmake/common/preset.cmake index 0fde24bae6a..0affdf04bdd 100644 --- a/tools/cmake/common/preset.cmake +++ b/tools/cmake/common/preset.cmake @@ -4,6 +4,54 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +# Announce the name and value of a cmake variable in the summary of the build. +function(announce_configured_options NAME) + get_property(_options GLOBAL PROPERTY _announce_configured_options) + if(NOT _options) + set_property(GLOBAL PROPERTY _announce_configured_options) + get_property(_options GLOBAL PROPERTY _announce_configured_options) + endif() + + set(option_exists FALSE) + foreach(_option IN LISTS _options) + if(_option STREQUAL "${NAME}") + set(option_exists TRUE) + break() + endif() + endforeach() + + if(NOT option_exists) + set(_options ${_options} "${NAME}") + set_property(GLOBAL PROPERTY _announce_configured_options "${_options}") + endif() +endfunction() + +# Print the configured options. +function(print_configured_options) + get_property(_options GLOBAL PROPERTY _announce_configured_options) + + set(_longest_name_length 0) + foreach(_option IN LISTS _options) + string(LENGTH "${_option}" length) + if(length GREATER _longest_name_length) + set(_longest_name_length ${length}) + endif() + endforeach() + + message(STATUS "--- Configurated Options ---\n") + foreach(_option IN LISTS _options) + string(LENGTH "${_option}" _option_length) + math(EXPR num_spaces "${_longest_name_length} - ${_option_length}") + set(padding "") + while(num_spaces GREATER 0) + set(padding "${padding} ") + math(EXPR num_spaces "${num_spaces} - 1") + endwhile() + message(STATUS "${_option}${padding} : ${${_option}}") + endforeach() + message(STATUS "---------------------------") +endfunction() + # Enforce option names to always start with EXECUTORCH. function(enforce_executorch_option_name NAME) if(NOT "${NAME}" MATCHES "^EXECUTORCH_") @@ -26,4 +74,6 @@ macro(define_overridable_option NAME DESCRIPTION VALUE_TYPE DEFAULT_VALUE) else() set(${NAME} ${DEFAULT_VALUE} CACHE ${VALUE_TYPE} ${DESCRIPTION}) endif() + + announce_configured_options(${NAME}) endmacro() From c35281394de8f757f20fa2fc30b25d099c7e2c35 Mon Sep 17 00:00:00 2001 From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com> Date: Thu, 8 May 2025 19:54:44 +0200 Subject: [PATCH 010/178] Arm backend: Remove redundant validation check for op_where (#10773) Signed-off-by: Sebastian Larsson --- backends/arm/operators/op_where.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/backends/arm/operators/op_where.py b/backends/arm/operators/op_where.py index d34f4134def..67392fefcd8 100644 --- a/backends/arm/operators/op_where.py +++ b/backends/arm/operators/op_where.py @@ -69,8 +69,6 @@ def define_node( ) -> None: import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore - validate_num_inputs(self.target, inputs, 3) - bi_supported_dtypes = [ ts.DType.INT8, ts.DType.INT16, @@ -99,8 +97,6 @@ def define_node( ) -> None: import tosa_tools.v0_80.serializer.tosa_serializer as ts # type: ignore - validate_num_inputs(self.target, inputs, 3) - mi_supported_dtypes = [ ts.DType.FP16, ts.DType.FP32, @@ -163,8 +159,6 @@ def define_node( ) -> None: import serializer.tosa_serializer as ts - validate_num_inputs(self.target, inputs, 3) - bi_supported_dtypes = [ ts.DType.INT8, ts.DType.INT16, @@ -193,8 +187,6 @@ def define_node( ) -> None: import serializer.tosa_serializer as ts - validate_num_inputs(self.target, inputs, 3) - mi_supported_dtypes = [ ts.DType.FP16, ts.DType.FP32, From d24eda4aa0bb3704aa0f86a708c1a0fc8166a240 Mon Sep 17 00:00:00 2001 From: Sebastian Larsson <38941629+Sebastian-Larsson@users.noreply.github.com> Date: Thu, 8 May 2025 19:57:14 +0200 Subject: [PATCH 011/178] Arm backend: Replace asserts with exceptions in permutation code (#10774) Refactor assertion statements to raise ValueErrors for better error handling in permutation matrix and vector transformations. Ensure that conditions are checked and appropriate exceptions are raised to enhance code robustness and readability. Signed-off-by: Sebastian Larsson --- backends/arm/operators/op_permute.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/backends/arm/operators/op_permute.py b/backends/arm/operators/op_permute.py index b78ee94b774..2b345cb5118 100644 --- a/backends/arm/operators/op_permute.py +++ b/backends/arm/operators/op_permute.py @@ -46,24 +46,26 @@ def permutation_matrix_to_vector(permutation_matrix: torch.Tensor) -> list[int]: (1,0,2) """ N = len(permutation_matrix) - assert N == len( - permutation_matrix[0] - ), f"A permutation matrix must be square, got shape {permutation_matrix.shape}" + if N != len(permutation_matrix[0]): + raise ValueError( + f"A permutation matrix must be square, got shape {permutation_matrix.shape}" + ) p = [0] * N for row_index, row in enumerate(permutation_matrix): saw_one = False for col_index, value in enumerate(row): if value == 1: - assert ( - not saw_one - ), f"A permutation matrix can only have one 1 per row, got row {row}." + if saw_one: + raise ValueError( + f"A permutation matrix can only have one 1 per row, got {row=}" + ) p[row_index] = col_index saw_one = True - else: - assert ( - value == 0 - ), f"A permutation matrix only contains 1's and 0's, got value {value}." + elif value != 0: + raise ValueError( + f"A permutation matrix only contains 1's and 0's, got {value=}" + ) return p From a37b369858ed89d12a593e318a5c9849f2c9613b Mon Sep 17 00:00:00 2001 From: trivedivivek <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 8 May 2025 13:35:50 -0500 Subject: [PATCH 012/178] Minor vector sizing change. (#10753) Summary: Minor change to reserve size for VkWriteDescriptorSet and VkDescriptorSetLayoutBinding vectors. Differential Revision: D74335276 --- backends/vulkan/runtime/vk_api/Descriptor.cpp | 5 ++--- backends/vulkan/runtime/vk_api/Shader.cpp | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/vk_api/Descriptor.cpp b/backends/vulkan/runtime/vk_api/Descriptor.cpp index b42ade6ea02..938666802ef 100644 --- a/backends/vulkan/runtime/vk_api/Descriptor.cpp +++ b/backends/vulkan/runtime/vk_api/Descriptor.cpp @@ -154,6 +154,7 @@ DescriptorSet& DescriptorSet::bind( VkDescriptorSet DescriptorSet::get_bind_handle() const { std::vector write_descriptor_sets; + write_descriptor_sets.reserve(bindings_.size()); for (const ResourceBinding& binding : bindings_) { VkWriteDescriptorSet write{ @@ -185,9 +186,7 @@ VkDescriptorSet DescriptorSet::get_bind_handle() const { 0u, nullptr); - VkDescriptorSet ret = handle_; - - return ret; + return handle_; } void DescriptorSet::add_binding(const ResourceBinding& binding) { diff --git a/backends/vulkan/runtime/vk_api/Shader.cpp b/backends/vulkan/runtime/vk_api/Shader.cpp index e560f37868e..458b1f83956 100644 --- a/backends/vulkan/runtime/vk_api/Shader.cpp +++ b/backends/vulkan/runtime/vk_api/Shader.cpp @@ -59,10 +59,11 @@ ShaderLayout::ShaderLayout( const ShaderLayout::Signature& signature) : device_(device), handle_{VK_NULL_HANDLE} { std::vector bindings; + bindings.reserve(signature.size()); uint32_t binding_num = 0u; for (const VkDescriptorType type : signature) { - bindings.push_back({ + bindings.emplace_back(VkDescriptorSetLayoutBinding{ binding_num++, // binding type, // descriptorType 1u, // descriptorCount From 380c4f1778a70e0239067a926269dfa23473ae25 Mon Sep 17 00:00:00 2001 From: jathu Date: Thu, 8 May 2025 13:32:49 -0700 Subject: [PATCH 013/178] Allow options to be set by presets (#10767) ### Summary In this diff we create a helper that will allow presets to set options. Again this is mostly a helper to check if the option has been defined already, then no-oping. To test it, I also create the first preset `macos-arm64`. I will test it in upcoming diffs. ### Test plan pytest for now, manual test in future diffs cc @larryliu0820 --- CMakeLists.txt | 2 - tools/cmake/Utils.cmake | 4 -- tools/cmake/common/preset.cmake | 14 ++++++ tools/cmake/common/preset_test.py | 67 ++++++++++++++++++++++++++++ tools/cmake/preset/default.cmake | 1 + tools/cmake/preset/macos-arm64.cmake | 7 +++ 6 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 tools/cmake/preset/macos-arm64.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d3f8e5f907..03e36186c94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,8 +178,6 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL "Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF ) -option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF) - option(EXECUTORCH_BUILD_KERNELS_CUSTOM "Build the custom kernels" OFF) option(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT "Build the custom ops lib for AOT" diff --git a/tools/cmake/Utils.cmake b/tools/cmake/Utils.cmake index 3155c3fc16e..edbd682c7e3 100644 --- a/tools/cmake/Utils.cmake +++ b/tools/cmake/Utils.cmake @@ -45,10 +45,6 @@ function(executorch_print_configuration_summary) message(STATUS " EXECUTORCH_BUILD_CADENCE : " "${EXECUTORCH_BUILD_CADENCE}" ) - message( - STATUS - " EXECUTORCH_BUILD_COREML : ${EXECUTORCH_BUILD_COREML}" - ) message( STATUS " EXECUTORCH_BUILD_CPUINFO : ${EXECUTORCH_BUILD_CPUINFO}" diff --git a/tools/cmake/common/preset.cmake b/tools/cmake/common/preset.cmake index 0affdf04bdd..8f886abab36 100644 --- a/tools/cmake/common/preset.cmake +++ b/tools/cmake/common/preset.cmake @@ -26,6 +26,7 @@ function(announce_configured_options NAME) endif() endfunction() + # Print the configured options. function(print_configured_options) get_property(_options GLOBAL PROPERTY _announce_configured_options) @@ -52,6 +53,7 @@ function(print_configured_options) message(STATUS "---------------------------") endfunction() + # Enforce option names to always start with EXECUTORCH. function(enforce_executorch_option_name NAME) if(NOT "${NAME}" MATCHES "^EXECUTORCH_") @@ -59,6 +61,7 @@ function(enforce_executorch_option_name NAME) endif() endfunction() + # Define an overridable option. # 1) If the option is already defined in the process, then store that in cache # 2) If the option is NOT set, then store the default value in cache @@ -77,3 +80,14 @@ macro(define_overridable_option NAME DESCRIPTION VALUE_TYPE DEFAULT_VALUE) announce_configured_options(${NAME}) endmacro() + + +# Set an overridable option. +macro(set_overridable_option NAME VALUE) + # If the user has explitily set the option, do not override it. + if(DEFINED ${NAME}) + return() + endif() + + set(${NAME} ${VALUE} CACHE STRING "") +endmacro() diff --git a/tools/cmake/common/preset_test.py b/tools/cmake/common/preset_test.py index eb564eadace..1748062f166 100644 --- a/tools/cmake/common/preset_test.py +++ b/tools/cmake/common/preset_test.py @@ -223,3 +223,70 @@ def test_define_overridable_option_cli_override_with_set_override(self): self.run_cmake(cmake_args=["-DEXECUTORCH_TEST_MESSAGE='cli value'"]) # If an option is set through cmake, it should NOT be overridable from the CLI. self.assert_cmake_cache("EXECUTORCH_TEST_MESSAGE", "set value", "STRING") + + def test_set_overridable_option_before(self): + _cmake_lists_txt = """ + cmake_minimum_required(VERSION 3.24) + project(test_preset) + include(${PROJECT_SOURCE_DIR}/preset.cmake) + set_overridable_option(EXECUTORCH_TEST_MESSAGE "from set_overridable_option") + add_subdirectory(build) + """ + _build_cmake_lists_txt = """ + define_overridable_option(EXECUTORCH_TEST_MESSAGE "test message" STRING "move fast") + """ + self.create_workspace( + { + "CMakeLists.txt": _cmake_lists_txt, + "build": { + "CMakeLists.txt": _build_cmake_lists_txt, + }, + } + ) + self.run_cmake() + self.assert_cmake_cache( + "EXECUTORCH_TEST_MESSAGE", "from set_overridable_option", "STRING" + ) + + def test_set_overridable_option_after(self): + _cmake_lists_txt = """ + cmake_minimum_required(VERSION 3.24) + project(test_preset) + include(${PROJECT_SOURCE_DIR}/preset.cmake) + add_subdirectory(build) + set_overridable_option(EXECUTORCH_TEST_MESSAGE "from set_overridable_option") + """ + _build_cmake_lists_txt = """ + define_overridable_option(EXECUTORCH_TEST_MESSAGE "test message" STRING "move fast") + """ + self.create_workspace( + { + "CMakeLists.txt": _cmake_lists_txt, + "build": { + "CMakeLists.txt": _build_cmake_lists_txt, + }, + } + ) + self.run_cmake() + self.assert_cmake_cache("EXECUTORCH_TEST_MESSAGE", "move fast", "STRING") + + def test_set_overridable_option_with_cli_override(self): + _cmake_lists_txt = """ + cmake_minimum_required(VERSION 3.24) + project(test_preset) + include(${PROJECT_SOURCE_DIR}/preset.cmake) + add_subdirectory(build) + """ + _build_cmake_lists_txt = """ + define_overridable_option(EXECUTORCH_TEST_MESSAGE "test message" STRING "move fast") + """ + self.create_workspace( + { + "CMakeLists.txt": _cmake_lists_txt, + "build": { + "CMakeLists.txt": _build_cmake_lists_txt, + }, + } + ) + self.run_cmake(cmake_args=["-DEXECUTORCH_TEST_MESSAGE='from the cli'"]) + self.assert_cmake_cache("EXECUTORCH_TEST_MESSAGE", "from the cli", "STRING") diff --git a/tools/cmake/preset/default.cmake b/tools/cmake/preset/default.cmake index eafa8a7a937..5fbb47b1396 100644 --- a/tools/cmake/preset/default.cmake +++ b/tools/cmake/preset/default.cmake @@ -15,3 +15,4 @@ endif() # MARK: - Definitions define_overridable_option(EXECUTORCH_ENABLE_LOGGING "Build with ET_LOG_ENABLED" BOOL ${_is_build_type_debug}) +define_overridable_option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" BOOL OFF) diff --git a/tools/cmake/preset/macos-arm64.cmake b/tools/cmake/preset/macos-arm64.cmake new file mode 100644 index 00000000000..84e60c50b92 --- /dev/null +++ b/tools/cmake/preset/macos-arm64.cmake @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set_overridable_option(EXECUTORCH_BUILD_COREML ON) From d25ce549e5eb5ce606b995d9d839fabfb0e1e990 Mon Sep 17 00:00:00 2001 From: tmsl Date: Thu, 8 May 2025 13:42:35 -0700 Subject: [PATCH 014/178] Convert the unit test from java to kotlin (#10702) ### Summary This change converts the unit test from java to kotlin. ### Test plan ./gradlew :executorch_android:testDebugUnitTest --------- Co-authored-by: Haiting Pu --- .../android/executorch_android/build.gradle | 5 + .../org/pytorch/executorch/EValueTest.java | 230 ------------- .../java/org/pytorch/executorch/EValueTest.kt | 224 +++++++++++++ .../org/pytorch/executorch/TensorTest.java | 305 ------------------ .../java/org/pytorch/executorch/TensorTest.kt | 296 +++++++++++++++++ extension/android/gradle/libs.versions.toml | 5 + 6 files changed, 530 insertions(+), 535 deletions(-) delete mode 100644 extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.java create mode 100644 extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.kt delete mode 100644 extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.java create mode 100644 extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.kt diff --git a/extension/android/executorch_android/build.gradle b/extension/android/executorch_android/build.gradle index 15088f4097f..fac08588740 100644 --- a/extension/android/executorch_android/build.gradle +++ b/extension/android/executorch_android/build.gradle @@ -9,6 +9,7 @@ plugins { id "com.android.library" version "8.9.0" id "com.vanniktech.maven.publish" version "0.31.0" + alias(libs.plugins.jetbrains.kotlin.android) } android { @@ -34,6 +35,9 @@ android { resources.srcDirs += [ 'src/androidTest/resources' ] } } + kotlinOptions { + jvmTarget = "1.8" + } } task copyTestRes(type: Exec) { @@ -43,6 +47,7 @@ task copyTestRes(type: Exec) { dependencies { implementation 'com.facebook.fbjni:fbjni:0.5.1' implementation 'com.facebook.soloader:nativeloader:0.10.5' + implementation libs.core.ktx testImplementation 'junit:junit:4.12' androidTestImplementation 'androidx.test.ext:junit:1.1.5' androidTestImplementation 'androidx.test:rules:1.2.0' diff --git a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.java b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.java deleted file mode 100644 index cbeb3a7b634..00000000000 --- a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.java +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -package org.pytorch.executorch; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.util.Arrays; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Unit tests for {@link EValue}. */ -@RunWith(JUnit4.class) -public class EValueTest { - @Test - public void testNone() { - EValue evalue = EValue.optionalNone(); - assertTrue(evalue.isNone()); - } - - @Test - public void testTensorValue() { - long[] data = {1, 2, 3}; - long[] shape = {1, 3}; - EValue evalue = EValue.from(Tensor.fromBlob(data, shape)); - assertTrue(evalue.isTensor()); - assertTrue(Arrays.equals(evalue.toTensor().shape, shape)); - assertTrue(Arrays.equals(evalue.toTensor().getDataAsLongArray(), data)); - } - - @Test - public void testBoolValue() { - EValue evalue = EValue.from(true); - assertTrue(evalue.isBool()); - assertTrue(evalue.toBool()); - } - - @Test - public void testIntValue() { - EValue evalue = EValue.from(1); - assertTrue(evalue.isInt()); - assertEquals(evalue.toInt(), 1); - } - - @Test - public void testDoubleValue() { - EValue evalue = EValue.from(0.1d); - assertTrue(evalue.isDouble()); - assertEquals(evalue.toDouble(), 0.1d, 0.0001d); - } - - @Test - public void testStringValue() { - EValue evalue = EValue.from("a"); - assertTrue(evalue.isString()); - assertEquals(evalue.toStr(), "a"); - } - - @Test - public void testAllIllegalCast() { - EValue evalue = EValue.optionalNone(); - assertTrue(evalue.isNone()); - - // try Tensor - assertFalse(evalue.isTensor()); - try { - evalue.toTensor(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - } - - // try bool - assertFalse(evalue.isBool()); - try { - evalue.toBool(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - } - - // try int - assertFalse(evalue.isInt()); - try { - evalue.toInt(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - } - - // try double - assertFalse(evalue.isDouble()); - try { - evalue.toDouble(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - } - - // try string - assertFalse(evalue.isString()); - try { - evalue.toStr(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - } - } - - @Test - public void testNoneSerde() { - EValue evalue = EValue.optionalNone(); - byte[] bytes = evalue.toByteArray(); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isNone(), true); - } - - @Test - public void testBoolSerde() { - EValue evalue = EValue.from(true); - byte[] bytes = evalue.toByteArray(); - assertEquals(1, bytes[1]); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isBool(), true); - assertEquals(deser.toBool(), true); - } - - @Test - public void testBoolSerde2() { - EValue evalue = EValue.from(false); - byte[] bytes = evalue.toByteArray(); - assertEquals(0, bytes[1]); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isBool(), true); - assertEquals(deser.toBool(), false); - } - - @Test - public void testIntSerde() { - EValue evalue = EValue.from(1); - byte[] bytes = evalue.toByteArray(); - assertEquals(0, bytes[1]); - assertEquals(0, bytes[2]); - assertEquals(0, bytes[3]); - assertEquals(0, bytes[4]); - assertEquals(0, bytes[5]); - assertEquals(0, bytes[6]); - assertEquals(0, bytes[7]); - assertEquals(1, bytes[8]); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isInt(), true); - assertEquals(deser.toInt(), 1); - } - - @Test - public void testLargeIntSerde() { - EValue evalue = EValue.from(256000); - byte[] bytes = evalue.toByteArray(); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isInt(), true); - assertEquals(deser.toInt(), 256000); - } - - @Test - public void testDoubleSerde() { - EValue evalue = EValue.from(1.345e-2d); - byte[] bytes = evalue.toByteArray(); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isDouble(), true); - assertEquals(1.345e-2d, deser.toDouble(), 1e-6); - } - - @Test - public void testLongTensorSerde() { - long data[] = {1, 2, 3, 4}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - - EValue evalue = EValue.from(tensor); - byte[] bytes = evalue.toByteArray(); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isTensor(), true); - Tensor deserTensor = deser.toTensor(); - long[] deserShape = deserTensor.shape(); - long[] deserData = deserTensor.getDataAsLongArray(); - - for (int i = 0; i < data.length; i++) { - assertEquals(data[i], deserData[i]); - } - - for (int i = 0; i < shape.length; i++) { - assertEquals(shape[i], deserShape[i]); - } - } - - @Test - public void testFloatTensorSerde() { - float data[] = {Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - - EValue evalue = EValue.from(tensor); - byte[] bytes = evalue.toByteArray(); - - EValue deser = EValue.fromByteArray(bytes); - assertEquals(deser.isTensor(), true); - Tensor deserTensor = deser.toTensor(); - long[] deserShape = deserTensor.shape(); - float[] deserData = deserTensor.getDataAsFloatArray(); - - for (int i = 0; i < data.length; i++) { - assertEquals(data[i], deserData[i], 1e-5); - } - - for (int i = 0; i < shape.length; i++) { - assertEquals(shape[i], deserShape[i]); - } - } -} diff --git a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.kt b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.kt new file mode 100644 index 00000000000..0e56480d621 --- /dev/null +++ b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/EValueTest.kt @@ -0,0 +1,224 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +package org.pytorch.executorch + +import org.junit.Assert +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 + +/** Unit tests for [EValue]. */ +@RunWith(JUnit4::class) +class EValueTest { + @Test + fun testNone() { + val evalue = EValue.optionalNone() + Assert.assertTrue(evalue.isNone) + } + + @Test + fun testTensorValue() { + val data = longArrayOf(1, 2, 3) + val shape = longArrayOf(1, 3) + val evalue = EValue.from(Tensor.fromBlob(data, shape)) + Assert.assertTrue(evalue.isTensor) + Assert.assertTrue(evalue.toTensor().shape.contentEquals(shape)) + Assert.assertTrue(evalue.toTensor().dataAsLongArray.contentEquals(data)) + } + + @Test + fun testBoolValue() { + val evalue = EValue.from(true) + Assert.assertTrue(evalue.isBool) + Assert.assertTrue(evalue.toBool()) + } + + @Test + fun testIntValue() { + val evalue = EValue.from(1) + Assert.assertTrue(evalue.isInt) + Assert.assertEquals(evalue.toInt(), 1) + } + + @Test + fun testDoubleValue() { + val evalue = EValue.from(0.1) + Assert.assertTrue(evalue.isDouble) + Assert.assertEquals(evalue.toDouble(), 0.1, 0.0001) + } + + @Test + fun testStringValue() { + val evalue = EValue.from("a") + Assert.assertTrue(evalue.isString) + Assert.assertEquals(evalue.toStr(), "a") + } + + @Test + fun testAllIllegalCast() { + val evalue = EValue.optionalNone() + Assert.assertTrue(evalue.isNone) + + // try Tensor + Assert.assertFalse(evalue.isTensor) + try { + evalue.toTensor() + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + } + + // try bool + Assert.assertFalse(evalue.isBool) + try { + evalue.toBool() + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + } + + // try int + Assert.assertFalse(evalue.isInt) + try { + evalue.toInt() + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + } + + // try double + Assert.assertFalse(evalue.isDouble) + try { + evalue.toDouble() + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + } + + // try string + Assert.assertFalse(evalue.isString) + try { + evalue.toStr() + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + } + } + + @Test + fun testNoneSerde() { + val evalue = EValue.optionalNone() + val bytes = evalue.toByteArray() + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isNone, true) + } + + @Test + fun testBoolSerde() { + val evalue = EValue.from(true) + val bytes = evalue.toByteArray() + Assert.assertEquals(1, bytes[1].toLong()) + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isBool, true) + Assert.assertEquals(deser.toBool(), true) + } + + @Test + fun testBoolSerde2() { + val evalue = EValue.from(false) + val bytes = evalue.toByteArray() + Assert.assertEquals(0, bytes[1].toLong()) + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isBool, true) + Assert.assertEquals(deser.toBool(), false) + } + + @Test + fun testIntSerde() { + val evalue = EValue.from(1) + val bytes = evalue.toByteArray() + Assert.assertEquals(0, bytes[1].toLong()) + Assert.assertEquals(0, bytes[2].toLong()) + Assert.assertEquals(0, bytes[3].toLong()) + Assert.assertEquals(0, bytes[4].toLong()) + Assert.assertEquals(0, bytes[5].toLong()) + Assert.assertEquals(0, bytes[6].toLong()) + Assert.assertEquals(0, bytes[7].toLong()) + Assert.assertEquals(1, bytes[8].toLong()) + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isInt, true) + Assert.assertEquals(deser.toInt(), 1) + } + + @Test + fun testLargeIntSerde() { + val evalue = EValue.from(256000) + val bytes = evalue.toByteArray() + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isInt, true) + Assert.assertEquals(deser.toInt(), 256000) + } + + @Test + fun testDoubleSerde() { + val evalue = EValue.from(1.345e-2) + val bytes = evalue.toByteArray() + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isDouble, true) + Assert.assertEquals(1.345e-2, deser.toDouble(), 1e-6) + } + + @Test + fun testLongTensorSerde() { + val data = longArrayOf(1, 2, 3, 4) + val shape = longArrayOf(2, 2) + val tensor = Tensor.fromBlob(data, shape) + + val evalue = EValue.from(tensor) + val bytes = evalue.toByteArray() + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isTensor, true) + val deserTensor = deser.toTensor() + val deserShape = deserTensor.shape() + val deserData = deserTensor.dataAsLongArray + + for (i in data.indices) { + Assert.assertEquals(data[i], deserData[i]) + } + + for (i in shape.indices) { + Assert.assertEquals(shape[i], deserShape[i]) + } + } + + @Test + fun testFloatTensorSerde() { + val data = floatArrayOf(Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE) + val shape = longArrayOf(2, 2) + val tensor = Tensor.fromBlob(data, shape) + + val evalue = EValue.from(tensor) + val bytes = evalue.toByteArray() + + val deser = EValue.fromByteArray(bytes) + Assert.assertEquals(deser.isTensor, true) + val deserTensor = deser.toTensor() + val deserShape = deserTensor.shape() + val deserData = deserTensor.dataAsFloatArray + + for (i in data.indices) { + Assert.assertEquals(data[i].toDouble(), deserData[i].toDouble(), 1e-5) + } + + for (i in shape.indices) { + Assert.assertEquals(shape[i], deserShape[i]) + } + } +} diff --git a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.java b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.java deleted file mode 100644 index 9811a1d0ff6..00000000000 --- a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -package org.pytorch.executorch; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - -import java.nio.ByteBuffer; -import java.nio.DoubleBuffer; -import java.nio.FloatBuffer; -import java.nio.IntBuffer; -import java.nio.LongBuffer; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Unit tests for {@link Tensor}. */ -@RunWith(JUnit4.class) -public class TensorTest { - - @Test - public void testFloatTensor() { - float data[] = {Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.FLOAT); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsFloatArray()[0], 1e-5); - assertEquals(data[1], tensor.getDataAsFloatArray()[1], 1e-5); - assertEquals(data[2], tensor.getDataAsFloatArray()[2], 1e-5); - assertEquals(data[3], tensor.getDataAsFloatArray()[3], 1e-5); - - FloatBuffer floatBuffer = Tensor.allocateFloatBuffer(4); - floatBuffer.put(data); - tensor = Tensor.fromBlob(floatBuffer, shape); - assertEquals(tensor.dtype(), DType.FLOAT); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsFloatArray()[0], 1e-5); - assertEquals(data[1], tensor.getDataAsFloatArray()[1], 1e-5); - assertEquals(data[2], tensor.getDataAsFloatArray()[2], 1e-5); - assertEquals(data[3], tensor.getDataAsFloatArray()[3], 1e-5); - } - - @Test - public void testIntTensor() { - int data[] = {Integer.MIN_VALUE, 0, 1, Integer.MAX_VALUE}; - long shape[] = {1, 4, 1}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.INT32); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsIntArray()[0]); - assertEquals(data[1], tensor.getDataAsIntArray()[1]); - assertEquals(data[2], tensor.getDataAsIntArray()[2]); - assertEquals(data[3], tensor.getDataAsIntArray()[3]); - - IntBuffer intBuffer = Tensor.allocateIntBuffer(4); - intBuffer.put(data); - tensor = Tensor.fromBlob(intBuffer, shape); - assertEquals(tensor.dtype(), DType.INT32); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsIntArray()[0]); - assertEquals(data[1], tensor.getDataAsIntArray()[1]); - assertEquals(data[2], tensor.getDataAsIntArray()[2]); - assertEquals(data[3], tensor.getDataAsIntArray()[3]); - } - - @Test - public void testDoubleTensor() { - double data[] = {Double.MIN_VALUE, 0.0d, 0.1d, Double.MAX_VALUE}; - long shape[] = {1, 4}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.DOUBLE); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsDoubleArray()[0], 1e-5); - assertEquals(data[1], tensor.getDataAsDoubleArray()[1], 1e-5); - assertEquals(data[2], tensor.getDataAsDoubleArray()[2], 1e-5); - assertEquals(data[3], tensor.getDataAsDoubleArray()[3], 1e-5); - - DoubleBuffer doubleBuffer = Tensor.allocateDoubleBuffer(4); - doubleBuffer.put(data); - tensor = Tensor.fromBlob(doubleBuffer, shape); - assertEquals(tensor.dtype(), DType.DOUBLE); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsDoubleArray()[0], 1e-5); - assertEquals(data[1], tensor.getDataAsDoubleArray()[1], 1e-5); - assertEquals(data[2], tensor.getDataAsDoubleArray()[2], 1e-5); - assertEquals(data[3], tensor.getDataAsDoubleArray()[3], 1e-5); - } - - @Test - public void testLongTensor() { - long data[] = {Long.MIN_VALUE, 0L, 1L, Long.MAX_VALUE}; - long shape[] = {4, 1}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.INT64); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsLongArray()[0]); - assertEquals(data[1], tensor.getDataAsLongArray()[1]); - assertEquals(data[2], tensor.getDataAsLongArray()[2]); - assertEquals(data[3], tensor.getDataAsLongArray()[3]); - - LongBuffer longBuffer = Tensor.allocateLongBuffer(4); - longBuffer.put(data); - tensor = Tensor.fromBlob(longBuffer, shape); - assertEquals(tensor.dtype(), DType.INT64); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsLongArray()[0]); - assertEquals(data[1], tensor.getDataAsLongArray()[1]); - assertEquals(data[2], tensor.getDataAsLongArray()[2]); - assertEquals(data[3], tensor.getDataAsLongArray()[3]); - } - - @Test - public void testSignedByteTensor() { - byte data[] = {Byte.MIN_VALUE, (byte) 0, (byte) 1, Byte.MAX_VALUE}; - long shape[] = {1, 1, 4}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.INT8); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsByteArray()[0]); - assertEquals(data[1], tensor.getDataAsByteArray()[1]); - assertEquals(data[2], tensor.getDataAsByteArray()[2]); - assertEquals(data[3], tensor.getDataAsByteArray()[3]); - - ByteBuffer byteBuffer = Tensor.allocateByteBuffer(4); - byteBuffer.put(data); - tensor = Tensor.fromBlob(byteBuffer, shape); - assertEquals(tensor.dtype(), DType.INT8); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsByteArray()[0]); - assertEquals(data[1], tensor.getDataAsByteArray()[1]); - assertEquals(data[2], tensor.getDataAsByteArray()[2]); - assertEquals(data[3], tensor.getDataAsByteArray()[3]); - } - - @Test - public void testUnsignedByteTensor() { - byte data[] = {(byte) 0, (byte) 1, (byte) 2, (byte) 255}; - long shape[] = {4, 1, 1}; - Tensor tensor = Tensor.fromBlobUnsigned(data, shape); - assertEquals(tensor.dtype(), DType.UINT8); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsUnsignedByteArray()[0]); - assertEquals(data[1], tensor.getDataAsUnsignedByteArray()[1]); - assertEquals(data[2], tensor.getDataAsUnsignedByteArray()[2]); - assertEquals(data[3], tensor.getDataAsUnsignedByteArray()[3]); - - ByteBuffer byteBuffer = Tensor.allocateByteBuffer(4); - byteBuffer.put(data); - tensor = Tensor.fromBlobUnsigned(byteBuffer, shape); - assertEquals(tensor.dtype(), DType.UINT8); - assertEquals(shape[0], tensor.shape()[0]); - assertEquals(shape[1], tensor.shape()[1]); - assertEquals(shape[2], tensor.shape()[2]); - assertEquals(4, tensor.numel()); - assertEquals(data[0], tensor.getDataAsUnsignedByteArray()[0]); - assertEquals(data[1], tensor.getDataAsUnsignedByteArray()[1]); - assertEquals(data[2], tensor.getDataAsUnsignedByteArray()[2]); - assertEquals(data[3], tensor.getDataAsUnsignedByteArray()[3]); - } - - @Test - public void testIllegalDataTypeException() { - float data[] = {Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - assertEquals(tensor.dtype(), DType.FLOAT); - - try { - tensor.getDataAsByteArray(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - // expected - } - try { - tensor.getDataAsUnsignedByteArray(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - // expected - } - try { - tensor.getDataAsIntArray(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - // expected - } - try { - tensor.getDataAsDoubleArray(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - // expected - } - try { - tensor.getDataAsLongArray(); - fail("Should have thrown an exception"); - } catch (IllegalStateException e) { - // expected - } - } - - @Test - public void testIllegalArguments() { - float data[] = {Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE}; - long shapeWithNegativeValues[] = {-1, 2}; - long mismatchShape[] = {1, 2}; - - try { - Tensor tensor = Tensor.fromBlob((float[]) null, mismatchShape); - fail("Should have thrown an exception"); - } catch (IllegalArgumentException e) { - // expected - } - try { - Tensor tensor = Tensor.fromBlob(data, null); - fail("Should have thrown an exception"); - } catch (IllegalArgumentException e) { - // expected - } - try { - Tensor tensor = Tensor.fromBlob(data, shapeWithNegativeValues); - fail("Should have thrown an exception"); - } catch (IllegalArgumentException e) { - // expected - } - try { - Tensor tensor = Tensor.fromBlob(data, mismatchShape); - fail("Should have thrown an exception"); - } catch (IllegalArgumentException e) { - // expected - } - } - - @Test - public void testLongTensorSerde() { - long data[] = {1, 2, 3, 4}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - byte[] bytes = tensor.toByteArray(); - - Tensor deser = Tensor.fromByteArray(bytes); - long[] deserShape = deser.shape(); - long[] deserData = deser.getDataAsLongArray(); - - for (int i = 0; i < data.length; i++) { - assertEquals(data[i], deserData[i]); - } - - for (int i = 0; i < shape.length; i++) { - assertEquals(shape[i], deserShape[i]); - } - } - - @Test - public void testFloatTensorSerde() { - float data[] = {Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE}; - long shape[] = {2, 2}; - Tensor tensor = Tensor.fromBlob(data, shape); - byte[] bytes = tensor.toByteArray(); - - Tensor deser = Tensor.fromByteArray(bytes); - long[] deserShape = deser.shape(); - float[] deserData = deser.getDataAsFloatArray(); - - for (int i = 0; i < data.length; i++) { - assertEquals(data[i], deserData[i], 1e-5); - } - - for (int i = 0; i < shape.length; i++) { - assertEquals(shape[i], deserShape[i]); - } - } -} diff --git a/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.kt b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.kt new file mode 100644 index 00000000000..4b206c8efbd --- /dev/null +++ b/extension/android/executorch_android/src/test/java/org/pytorch/executorch/TensorTest.kt @@ -0,0 +1,296 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +package org.pytorch.executorch + +import org.junit.Assert +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 + +/** Unit tests for [Tensor]. */ +@RunWith(JUnit4::class) +class TensorTest { + @Test + fun testFloatTensor() { + val data = floatArrayOf(Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE) + val shape = longArrayOf(2, 2) + var tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.FLOAT) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toDouble(), tensor.dataAsFloatArray[0].toDouble(), 1e-5) + Assert.assertEquals(data[1].toDouble(), tensor.dataAsFloatArray[1].toDouble(), 1e-5) + Assert.assertEquals(data[2].toDouble(), tensor.dataAsFloatArray[2].toDouble(), 1e-5) + Assert.assertEquals(data[3].toDouble(), tensor.dataAsFloatArray[3].toDouble(), 1e-5) + + val floatBuffer = Tensor.allocateFloatBuffer(4) + floatBuffer.put(data) + tensor = Tensor.fromBlob(floatBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.FLOAT) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toDouble(), tensor.dataAsFloatArray[0].toDouble(), 1e-5) + Assert.assertEquals(data[1].toDouble(), tensor.dataAsFloatArray[1].toDouble(), 1e-5) + Assert.assertEquals(data[2].toDouble(), tensor.dataAsFloatArray[2].toDouble(), 1e-5) + Assert.assertEquals(data[3].toDouble(), tensor.dataAsFloatArray[3].toDouble(), 1e-5) + } + + @Test + fun testIntTensor() { + val data = intArrayOf(Int.MIN_VALUE, 0, 1, Int.MAX_VALUE) + val shape = longArrayOf(1, 4, 1) + var tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.INT32) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsIntArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsIntArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsIntArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsIntArray[3].toLong()) + + val intBuffer = Tensor.allocateIntBuffer(4) + intBuffer.put(data) + tensor = Tensor.fromBlob(intBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.INT32) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsIntArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsIntArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsIntArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsIntArray[3].toLong()) + } + + @Test + fun testDoubleTensor() { + val data = doubleArrayOf(Double.MIN_VALUE, 0.0, 0.1, Double.MAX_VALUE) + val shape = longArrayOf(1, 4) + var tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.DOUBLE) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0], tensor.dataAsDoubleArray[0], 1e-5) + Assert.assertEquals(data[1], tensor.dataAsDoubleArray[1], 1e-5) + Assert.assertEquals(data[2], tensor.dataAsDoubleArray[2], 1e-5) + Assert.assertEquals(data[3], tensor.dataAsDoubleArray[3], 1e-5) + + val doubleBuffer = Tensor.allocateDoubleBuffer(4) + doubleBuffer.put(data) + tensor = Tensor.fromBlob(doubleBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.DOUBLE) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0], tensor.dataAsDoubleArray[0], 1e-5) + Assert.assertEquals(data[1], tensor.dataAsDoubleArray[1], 1e-5) + Assert.assertEquals(data[2], tensor.dataAsDoubleArray[2], 1e-5) + Assert.assertEquals(data[3], tensor.dataAsDoubleArray[3], 1e-5) + } + + @Test + fun testLongTensor() { + val data = longArrayOf(Long.MIN_VALUE, 0L, 1L, Long.MAX_VALUE) + val shape = longArrayOf(4, 1) + var tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.INT64) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0], tensor.dataAsLongArray[0]) + Assert.assertEquals(data[1], tensor.dataAsLongArray[1]) + Assert.assertEquals(data[2], tensor.dataAsLongArray[2]) + Assert.assertEquals(data[3], tensor.dataAsLongArray[3]) + + val longBuffer = Tensor.allocateLongBuffer(4) + longBuffer.put(data) + tensor = Tensor.fromBlob(longBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.INT64) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0], tensor.dataAsLongArray[0]) + Assert.assertEquals(data[1], tensor.dataAsLongArray[1]) + Assert.assertEquals(data[2], tensor.dataAsLongArray[2]) + Assert.assertEquals(data[3], tensor.dataAsLongArray[3]) + } + + @Test + fun testSignedByteTensor() { + val data = byteArrayOf(Byte.MIN_VALUE, 0.toByte(), 1.toByte(), Byte.MAX_VALUE) + val shape = longArrayOf(1, 1, 4) + var tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.INT8) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsByteArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsByteArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsByteArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsByteArray[3].toLong()) + + val byteBuffer = Tensor.allocateByteBuffer(4) + byteBuffer.put(data) + tensor = Tensor.fromBlob(byteBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.INT8) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsByteArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsByteArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsByteArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsByteArray[3].toLong()) + } + + @Test + fun testUnsignedByteTensor() { + val data = byteArrayOf(0.toByte(), 1.toByte(), 2.toByte(), 255.toByte()) + val shape = longArrayOf(4, 1, 1) + var tensor = Tensor.fromBlobUnsigned(data, shape) + Assert.assertEquals(tensor.dtype(), DType.UINT8) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsUnsignedByteArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsUnsignedByteArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsUnsignedByteArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsUnsignedByteArray[3].toLong()) + + val byteBuffer = Tensor.allocateByteBuffer(4) + byteBuffer.put(data) + tensor = Tensor.fromBlobUnsigned(byteBuffer, shape) + Assert.assertEquals(tensor.dtype(), DType.UINT8) + Assert.assertEquals(shape[0], tensor.shape()[0]) + Assert.assertEquals(shape[1], tensor.shape()[1]) + Assert.assertEquals(shape[2], tensor.shape()[2]) + Assert.assertEquals(4, tensor.numel()) + Assert.assertEquals(data[0].toLong(), tensor.dataAsUnsignedByteArray[0].toLong()) + Assert.assertEquals(data[1].toLong(), tensor.dataAsUnsignedByteArray[1].toLong()) + Assert.assertEquals(data[2].toLong(), tensor.dataAsUnsignedByteArray[2].toLong()) + Assert.assertEquals(data[3].toLong(), tensor.dataAsUnsignedByteArray[3].toLong()) + } + + @Test + fun testIllegalDataTypeException() { + val data = floatArrayOf(Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE) + val shape = longArrayOf(2, 2) + val tensor = Tensor.fromBlob(data, shape) + Assert.assertEquals(tensor.dtype(), DType.FLOAT) + + try { + tensor.dataAsByteArray + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + // expected + } + try { + tensor.dataAsUnsignedByteArray + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + // expected + } + try { + tensor.dataAsIntArray + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + // expected + } + try { + tensor.dataAsDoubleArray + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + // expected + } + try { + tensor.dataAsLongArray + Assert.fail("Should have thrown an exception") + } catch (e: IllegalStateException) { + // expected + } + } + + @Test + fun testIllegalArguments() { + val data = floatArrayOf(Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE) + val shapeWithNegativeValues = longArrayOf(-1, 2) + val mismatchShape = longArrayOf(1, 2) + + try { + val tensor = Tensor.fromBlob(null as FloatArray?, mismatchShape) + Assert.fail("Should have thrown an exception") + } catch (e: IllegalArgumentException) { + // expected + } + try { + val tensor = Tensor.fromBlob(data, null) + Assert.fail("Should have thrown an exception") + } catch (e: IllegalArgumentException) { + // expected + } + try { + val tensor = Tensor.fromBlob(data, shapeWithNegativeValues) + Assert.fail("Should have thrown an exception") + } catch (e: IllegalArgumentException) { + // expected + } + try { + val tensor = Tensor.fromBlob(data, mismatchShape) + Assert.fail("Should have thrown an exception") + } catch (e: IllegalArgumentException) { + // expected + } + } + + @Test + fun testLongTensorSerde() { + val data = longArrayOf(1, 2, 3, 4) + val shape = longArrayOf(2, 2) + val tensor = Tensor.fromBlob(data, shape) + val bytes = tensor.toByteArray() + + val deser = Tensor.fromByteArray(bytes) + val deserShape = deser.shape() + val deserData = deser.dataAsLongArray + + for (i in data.indices) { + Assert.assertEquals(data[i], deserData[i]) + } + + for (i in shape.indices) { + Assert.assertEquals(shape[i], deserShape[i]) + } + } + + @Test + fun testFloatTensorSerde() { + val data = floatArrayOf(Float.MIN_VALUE, 0f, 0.1f, Float.MAX_VALUE) + val shape = longArrayOf(2, 2) + val tensor = Tensor.fromBlob(data, shape) + val bytes = tensor.toByteArray() + + val deser = Tensor.fromByteArray(bytes) + val deserShape = deser.shape() + val deserData = deser.dataAsFloatArray + + for (i in data.indices) { + Assert.assertEquals(data[i].toDouble(), deserData[i].toDouble(), 1e-5) + } + + for (i in shape.indices) { + Assert.assertEquals(shape[i], deserShape[i]) + } + } +} diff --git a/extension/android/gradle/libs.versions.toml b/extension/android/gradle/libs.versions.toml index 561988cb1f6..fcd6a356536 100644 --- a/extension/android/gradle/libs.versions.toml +++ b/extension/android/gradle/libs.versions.toml @@ -5,8 +5,13 @@ commons-math3 = "3.6.1" guava = "32.1.3-jre" junit = "4.13.2" +core-ktx = "1.13.1" +kotlin = "1.9.23" [libraries] commons-math3 = { module = "org.apache.commons:commons-math3", version.ref = "commons-math3" } guava = { module = "com.google.guava:guava", version.ref = "guava" } junit = { module = "junit:junit", version.ref = "junit" } +core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "core-ktx" } +[plugins] +jetbrains-kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" } From ac26555aa86cceaddfbf16a17fb4548c5dbc3b40 Mon Sep 17 00:00:00 2001 From: jathu Date: Thu, 8 May 2025 14:41:50 -0700 Subject: [PATCH 015/178] Create a macos-arm64 preset (#10768) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary * Create the base for a macos-arm64 preset — bigger migration in future diffs * Create an Apple CI job to test builds ### Test plan CI + ``` $ cmake --preset macos-arm64 -- Loading build preset: /Users/jathu/executorch/tools/cmake/preset/macos-arm64.cmake -- --- Configurated Options --- -- EXECUTORCH_BUILD_PRESET_FILE : /Users/jathu/executorch/tools/cmake/preset/macos-arm64.cmake -- EXECUTORCH_ENABLE_LOGGING : ON -- EXECUTORCH_BUILD_COREML : ON -- --------------------------- $ cmake --build cmake-out --parallel ``` cc @larryliu0820 --- .github/workflows/build-presets.yml | 17 +++++++++++++++ CMakeLists.txt | 2 ++ CMakePresets.json | 33 +++++++++++++++++++++++++++++ tools/cmake/Utils.cmake | 5 +++++ tools/cmake/common/preset.cmake | 11 ++++++++++ 5 files changed, 68 insertions(+) create mode 100644 CMakePresets.json diff --git a/.github/workflows/build-presets.yml b/.github/workflows/build-presets.yml index 39bc9dc6480..7f3c958ae55 100644 --- a/.github/workflows/build-presets.yml +++ b/.github/workflows/build-presets.yml @@ -11,3 +11,20 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} cancel-in-progress: true + +jobs: + apple: + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + strategy: + matrix: + preset: [macos-arm64] + with: + job-name: build + runner: macos-latest-xlarge + python-version: 3.12 + submodules: recursive + script: | + set -eux + ${CONDA_RUN} ./install_requirements.sh > /dev/null + ${CONDA_RUN} cmake --preset ${{ matrix.preset }} + ${CONDA_RUN} cmake --build cmake-out --parallel diff --git a/CMakeLists.txt b/CMakeLists.txt index 03e36186c94..76c75270d5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,8 @@ project(executorch) # MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION -------------------------------------------------- include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake) + +load_build_preset() include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake) # Print all the configs that were called with announce_configured_options. diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 00000000000..5006ba9ec05 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,33 @@ +{ + "version": 10, + "cmakeMinimumRequired": { + "major": 3, + "minor": 31, + "patch": 0 + }, + "$comment": "On-device AI across mobile, embedded and edge for PyTorch.", + "configurePresets": [ + { + "name": "common", + "hidden": true, + "binaryDir": "${sourceDir}/cmake-out", + "generator": "Unix Makefiles" + }, + { + "name": "macos-arm64", + "inherits": ["common"], + "generator": "Xcode", + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake", + "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos-arm64.cmake", + "PLATFORM": "MAC_ARM64", + "DEPLOYMENT_TARGET": "10.15" + }, + "condition": { + "lhs": "${hostSystemName}", + "type": "equals", + "rhs": "Darwin" + } + } + ] +} diff --git a/tools/cmake/Utils.cmake b/tools/cmake/Utils.cmake index edbd682c7e3..dda83f1794e 100644 --- a/tools/cmake/Utils.cmake +++ b/tools/cmake/Utils.cmake @@ -313,6 +313,11 @@ function(resolve_python_executable) python PARENT_SCOPE ) + elseif(DEFINED ENV{VIRTUAL_ENV}) + set(PYTHON_EXECUTABLE + $ENV{VIRTUAL_ENV}/bin/python3 + PARENT_SCOPE + ) else() set(PYTHON_EXECUTABLE python3 diff --git a/tools/cmake/common/preset.cmake b/tools/cmake/common/preset.cmake index 8f886abab36..e9933c8f05e 100644 --- a/tools/cmake/common/preset.cmake +++ b/tools/cmake/common/preset.cmake @@ -91,3 +91,14 @@ macro(set_overridable_option NAME VALUE) set(${NAME} ${VALUE} CACHE STRING "") endmacro() + +# Detemine the build preset and load it. +macro(load_build_preset) + if(DEFINED EXECUTORCH_BUILD_PRESET_FILE) + announce_configured_options(EXECUTORCH_BUILD_PRESET_FILE) + message(STATUS "Loading build preset: ${EXECUTORCH_BUILD_PRESET_FILE}") + include(${EXECUTORCH_BUILD_PRESET_FILE}) + endif() + # For now, just continue if the preset file is not set. In the future, we will + # try to determine a preset file. +endmacro() From 5ad676d14b54c40ce40946588e8036d4125b0a2a Mon Sep 17 00:00:00 2001 From: mcremon-meta <134334895+mcremon-meta@users.noreply.github.com> Date: Thu, 8 May 2025 15:04:17 -0700 Subject: [PATCH 016/178] Extract trace from prepare_and_convert and remove export_program Differential Revision: D73440517 Pull Request resolved: https://github.com/pytorch/executorch/pull/10493 --- backends/cadence/aot/compiler.py | 97 +++++++++++-------- backends/cadence/aot/export_example.py | 6 +- .../aot/tests/test_remove_ops_passes.py | 68 +++++++------ 3 files changed, 97 insertions(+), 74 deletions(-) diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index 3d43ca2956e..594c4189b3a 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -39,7 +39,6 @@ from torch._inductor.decomposition import remove_decompositions from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e -from torch.export import export from torch.export.exported_program import ExportedProgram from .passes import get_cadence_passes @@ -55,27 +54,24 @@ # however useful for unit tests to separate the converted model from the fused # model, to be able to get reference numerics. # If this does not apply, please use quantize_and_fuse_pt2 instead. -def prepare_and_convert_pt2( +def trace( model: torch.nn.Module, inputs: tuple[object, ...], - quantizer: CadenceQuantizer, - calibration_data: Optional[list[tuple[object, ...]]] = None, dump_graphs: bool = False, -) -> torch.fx.GraphModule: +) -> ExportedProgram: """ - Prepare and convert a model using the given quantizer. - The quantizer must be supplied and be the same as the one used to - fuse the model later, if applicable. If you do not expect that behavior, - please use quantize_and_fuse_pt2 instead, which will instantiate a - default quantizer for you if needed. - If calibration data is provided, it will be used to calibrate the model. If - not, the inputs will be used for calibration instead, which is useful for - unit tests but should not be used for end-to-end use cases. - Returns a GraphModule with the converted model. + Trace the model with export_for_training and return an ExportedProgram. """ + # Make the model inference mode by calling model.eval() + model.eval() + + # Prevent mkldnn decompositions + torch._C._set_mkldnn_enabled(False) + # Get default decompositions decomp_table = torch.export.default_decompositions() + # Select ops to keep ops_to_keep = [ torch.ops.aten.conv1d.default, @@ -85,19 +81,46 @@ def prepare_and_convert_pt2( torch.ops.aten.matmul.default, torch.ops.aten.rms_norm.default, ] + # Remove decompositions for the ops we want to keep # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any remove_decompositions(decomp_table, ops_to_keep) + # Export with dynamo - model_gm = ( - torch.export.export_for_training(model, inputs, strict=True) - .run_decompositions(decomp_table) - .module() - ) + program = torch.export.export_for_training( + model, inputs, strict=True + ).run_decompositions(decomp_table) if dump_graphs: logging.info("Graph before quantization:") - logging.info(model_gm.graph.print_tabular()) + logging.info(program.module().graph.print_tabular()) + + return program + + +def prepare_and_convert_pt2( + program: ExportedProgram, + inputs: tuple[object, ...], + quantizer: CadenceQuantizer, + calibration_data: Optional[list[tuple[object, ...]]] = None, + dump_graphs: bool = False, +) -> torch.fx.GraphModule: + """ + Prepare and convert a model using the given quantizer. + The quantizer must be supplied and be the same as the one used to + fuse the model later, if applicable. If you do not expect that behavior, + please use quantize_and_fuse_pt2 instead, which will instantiate a + default quantizer for you if needed. + If calibration data is provided, it will be used to calibrate the model. If + not, the inputs will be used for calibration instead, which is useful for + unit tests but should not be used for end-to-end use cases. + Returns a GraphModule with the converted model. + """ + + # Get the graph module from the ExportedProgram + model_gm = program.module() + + assert isinstance(model_gm, torch.fx.GraphModule) # Prepare prepared_model = prepare_pt2e(model_gm, quantizer) @@ -121,10 +144,10 @@ def prepare_and_convert_pt2( # Note: this is not meant as a primary API since it can create inconsistencies -# if the quantizer here is different from the quantizer used to convert. It is -# however useful for unit tests to separate the converted model from the fused -# model, to be able to get reference numerics. -# If this does not apply, please use quantize_and_fuse_pt2 instead. +# if the quantizer here is different from the quantizer used to prepare/convert. +# It is however useful for unit tests to separate the converted model from the +# fused model, to be able to get reference numerics. +# If this does not apply, please use quantize_pt2 instead. def fuse_pt2( converted_graph_module: torch.fx.GraphModule, quantizer: CadenceQuantizer, @@ -167,9 +190,15 @@ def quantize_pt2( if not quantizer: quantizer = CadenceDefaultQuantizer() + program = trace(model, inputs, dump_graphs=dump_graphs) + + if dump_graphs: + logging.info("Graph after trace:") + logging.info(program.graph.print_tabular()) + # Get converted graph module converted_gm = prepare_and_convert_pt2( - model, inputs, quantizer, calibration_data, dump_graphs=dump_graphs + program, inputs, quantizer, calibration_data, dump_graphs=dump_graphs ) # Get fused model @@ -184,22 +213,6 @@ def quantize_pt2( return program -# Export the model and lower it to an ExportedProgram (in aten IR) -def export_program( - model: torch.nn.Module, - inputs: tuple[object, ...], -) -> ExportedProgram: - assert isinstance(model, torch.nn.Module), "model should be an nn.Module" - - # Prevent mkldnn decompositions - torch._C._set_mkldnn_enabled(False) - - # Export the model and return it. - expo_program = export(model, inputs, strict=True) - - return expo_program - - def _lower_ep_to_edge( expo_program: ExportedProgram, dump_graphs: bool = False, @@ -248,7 +261,7 @@ def export_to_edge( assert isinstance(model, torch.nn.Module), "model should be an nn.Module" # Export the model into an ExportedProgram. - expo_program = export_program(model, inputs) + expo_program = trace(model, inputs) # Lower the model to edge IR. edge_prog_manager = _lower_ep_to_edge(expo_program, dump_graphs, constant_methods) diff --git a/backends/cadence/aot/export_example.py b/backends/cadence/aot/export_example.py index d2148870e53..6eaead7105e 100644 --- a/backends/cadence/aot/export_example.py +++ b/backends/cadence/aot/export_example.py @@ -18,6 +18,7 @@ export_to_executorch_gen_etrecord, fuse_pt2, prepare_and_convert_pt2, + trace, ) from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer @@ -48,8 +49,11 @@ def export_model( # Instantiate the quantizer quantizer = CadenceDefaultQuantizer() + # Trace the model + ep = trace(model, example_inputs) + # Convert the model - converted_model = prepare_and_convert_pt2(model, example_inputs, quantizer) + converted_model = prepare_and_convert_pt2(ep, example_inputs, quantizer) # Get reference outputs from converted model ref_outputs = converted_model(*example_inputs) diff --git a/backends/cadence/aot/tests/test_remove_ops_passes.py b/backends/cadence/aot/tests/test_remove_ops_passes.py index 8caba7799b5..74c39ae3ee3 100644 --- a/backends/cadence/aot/tests/test_remove_ops_passes.py +++ b/backends/cadence/aot/tests/test_remove_ops_passes.py @@ -16,10 +16,10 @@ import torch.nn.functional as F from executorch.backends.cadence.aot import compiler from executorch.backends.cadence.aot.compiler import export_to_edge +from executorch.backends.cadence.aot.fuse_ops import FuseQuantDequantToRequantizePass from executorch.backends.cadence.aot.graph_builder import GraphBuilder from executorch.backends.cadence.aot.pass_utils import count_node, op_counts_match -from executorch.backends.cadence.aot.quantizer.quantizer import CadenceDefaultQuantizer from executorch.backends.cadence.aot.remove_ops import ( RemoveAliasCopyOpPass, RemoveBranchedQuantDequant, @@ -42,9 +42,6 @@ from parameterized.parameterized import parameterized from pyre_extensions import none_throws -from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e - -from torch.export import export_for_training from torch.fx.passes.infra.pass_base import PassResult @@ -459,44 +456,53 @@ def forward(self, x, y): ) def test_remove_nop_quant_dequant(self): - class M(torch.nn.Module): - def __init__(self): - super(M, self).__init__() - self.linear = torch.nn.Linear(6, 12, bias=False) + builder = GraphBuilder() + x = builder.placeholder("x", torch.randn(8, 8)) + q0 = builder.call_operator( + op=exir_ops.edge.cadence.quantize_per_tensor.default, + args=(x, 0.01662161760032177, -4, -128, 127, torch.int8), + ) + dq0 = builder.call_operator( + op=exir_ops.edge.cadence.dequantize_per_tensor.default, + args=(q0, 0.01662161760032177, -4, -128, 127, torch.int8), + ) + q1 = builder.call_operator( + op=exir_ops.edge.cadence.quantize_per_tensor.default, + args=(x, 0.012577153742313385, -9, -128, 127, torch.int8), + ) + builder.output([dq0, q1]) + graph_module = builder.get_graph_module() - def forward(self, x): - x = self.linear(x) - return x + # Expect the dq op to be removed by the pass + self.assertEqual( + count_node( + graph_module, exir_ops.edge.cadence.dequantize_per_tensor.default + ), + 1, + ) - inp = torch.randn(2, 8, 1, 6) + # Expect 1 quantize op left since it has no matching dequant + self.assertEqual( + count_node(graph_module, exir_ops.edge.cadence.quantize_per_tensor.default), + 2, + ) - # Run the standard quant/convert steps, but without fusing - # this leaves two redundant quant/dequant pairs to test with - quantizer = CadenceDefaultQuantizer() - model_exp = export_for_training(M(), (inp,), strict=True).module() - prepared_model = prepare_pt2e(model_exp, quantizer) - prepared_model(inp) - converted_model = convert_pt2e(prepared_model) + p = FuseQuantDequantToRequantizePass() - graph_module = ( - compiler.export_to_cadence( - converted_model, - (inp,), - ) - .exported_program() - .graph_module - ) + graph_after_passes = cast(PassResult, p(graph_module)).graph_module - # Expect all quantize ops to be removed by the pass + # Expect the dq op to be removed by the pass self.assertEqual( - count_node(graph_module, exir_ops.edge.cadence.quantize_per_tensor.default), + count_node( + graph_after_passes, exir_ops.edge.cadence.dequantize_per_tensor.default + ), 0, ) - # Expect 1 dequantize op for the weights + # Expect 1 quantize op left since it has no matching dequant self.assertEqual( count_node( - graph_module, exir_ops.edge.cadence.dequantize_per_tensor.default + graph_after_passes, exir_ops.edge.cadence.quantize_per_tensor.default ), 1, ) From 277c39d869e18c70eb85d33025fa554d368caa8a Mon Sep 17 00:00:00 2001 From: Thomas Jannaud Date: Thu, 8 May 2025 16:15:03 -0700 Subject: [PATCH 017/178] Make constant_folding's _DEFAULT_SKIP_TARGETS public Differential Revision: D74349918 Pull Request resolved: https://github.com/pytorch/executorch/pull/10760 --- exir/passes/constant_prop_pass.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/exir/passes/constant_prop_pass.py b/exir/passes/constant_prop_pass.py index 0049e597f8d..6921bd632f4 100644 --- a/exir/passes/constant_prop_pass.py +++ b/exir/passes/constant_prop_pass.py @@ -29,7 +29,8 @@ # Avoid propagating constants for `exir.ops.edge.aten.full.default`. # Propagating aten.full can significantly increase compiled model size. -_DEFAULT_SKIP_TARGETS = {exir_ops.edge.aten.full.default} +_DEFAULT_SKIP_TARGETS_NO_QUANT = {exir_ops.edge.aten.full.default} +_DEFAULT_SKIP_TARGETS = set(_DEFAULT_SKIP_TARGETS_NO_QUANT) # Do not const prop quantization primitives _QUANT_PRIMITIVES_EDGE = [aten_to_edge(op) for op in _QUANT_PRIMITIVES] @@ -48,6 +49,10 @@ ) +def get_default_skip_targets_no_quant() -> set[EdgeOpOverload]: + return _DEFAULT_SKIP_TARGETS_NO_QUANT + + def is_const( arg, exported_program: ExportedProgram, From b1b46ee4c5615f5f2008cc05ee1e32367b3e43bd Mon Sep 17 00:00:00 2001 From: Thomas Jannaud Date: Thu, 8 May 2025 17:04:14 -0700 Subject: [PATCH 018/178] : constant fold None Differential Revision: D74350331 Pull Request resolved: https://github.com/pytorch/executorch/pull/10762 --- exir/passes/constant_prop_pass.py | 2 ++ exir/tests/test_passes.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/exir/passes/constant_prop_pass.py b/exir/passes/constant_prop_pass.py index 6921bd632f4..a103568b9a9 100644 --- a/exir/passes/constant_prop_pass.py +++ b/exir/passes/constant_prop_pass.py @@ -66,6 +66,8 @@ def is_const( ) elif isinstance(arg, _PRIMITIVE_TYPES): return True + elif arg is None: + return True elif not isinstance(arg, torch.fx.Node): return False elif arg in const_node_to_tensor: diff --git a/exir/tests/test_passes.py b/exir/tests/test_passes.py index d3c2d0a0936..6618c729987 100644 --- a/exir/tests/test_passes.py +++ b/exir/tests/test_passes.py @@ -1823,3 +1823,34 @@ def _do_checks( self.assertTrue( torch.allclose(output_no_dim_order[0], output_no_dim_order_revert[0]) ) + + def test_constant_prop_pass_none(self) -> None: + """ + This checks that None arguments are treated as constants in constant_prop_pass. + """ + + class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.cst = torch.ones(3, 3, 3, dtype=torch.int8) + self.w = torch.ones(3, 3, 3, dtype=torch.int8) + + def forward(self, x): + # Note: using e.g aten.linear would not work as None is not in the graph + a = torch.ops.aten.convolution.default( + self.cst, self.w, None, [1], [0], [1], False, [0], 1 + ) + return a + x + + mod = M() + x = torch.randn([3, 3, 3]) + mod(x) + edge = to_edge( + export(mod, (x,), strict=True), + compile_config=exir.EdgeCompileConfig(_check_ir_validity=False), + ) + # 2 constants: self.w and self.cst + self.assertEqual(2, len(edge.exported_program().constants)) + pass_result = constant_prop_pass(edge.exported_program()) + # 1 constant: a (= self.w @ self.cst) + self.assertEqual(1, len(pass_result.constants)) From 6e3cb79b0120a5d44aef2379b59d35842634a3bc Mon Sep 17 00:00:00 2001 From: wl1026sun Date: Thu, 8 May 2025 23:23:06 -0700 Subject: [PATCH 019/178] to make TIE quantized conv operator to fall back to hifi quantized conv op instead of cpu op for shapes not supported by the TIE kernel. Differential Revision: D74337713 Pull Request resolved: https://github.com/pytorch/executorch/pull/10770 --- backends/cadence/hifi/operators/operators.h | 39 +++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/backends/cadence/hifi/operators/operators.h b/backends/cadence/hifi/operators/operators.h index e9cfca6fb70..105510e3421 100644 --- a/backends/cadence/hifi/operators/operators.h +++ b/backends/cadence/hifi/operators/operators.h @@ -12,6 +12,7 @@ _(uint8_t, Byte) \ _(int8_t, Char) +using ::executorch::aten::IntArrayRef; using ::executorch::aten::optional; using ::executorch::aten::ScalarType; using ::executorch::aten::Tensor; @@ -67,6 +68,44 @@ void quantized_linear_per_tensor_out( __ET_UNUSED const optional& offset, Tensor& out); +void quantized_conv_out( + __ET_UNUSED KernelRuntimeContext& ctx, + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + const Tensor& weight_zero_point, + const Tensor& bias_scale, + double output_scale, + int64_t output_zero_point, + __ET_UNUSED const Tensor& out_multiplier, + __ET_UNUSED const Tensor& out_shift, + bool channel_last, + Tensor& out); + +void quantized_conv_per_tensor_out( + __ET_UNUSED KernelRuntimeContext& ctx, + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef dilation, + int64_t groups, + int64_t in_zero_point, + int64_t weight_zero_point, + double bias_scale, + double output_scale, + int64_t output_zero_point, + __ET_UNUSED int64_t out_multiplier, + __ET_UNUSED int64_t out_shift, + bool channel_last, + Tensor& out); + } // namespace native } // namespace HiFi } // namespace impl From 01a5d818ccee61e734c7c1e42bbc8abba24a8395 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Fri, 9 May 2025 01:34:47 -0500 Subject: [PATCH 020/178] Arm Backend: Use tosa_ref_model only if it is avaiable Differential Revision: D74420616 Pull Request resolved: https://github.com/pytorch/executorch/pull/10778 --- backends/arm/test/conftest.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index 2d247f7bd42..936a4f64a10 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -44,10 +44,20 @@ def pytest_configure(config): if getattr(config.option, "fast_fvp", False): pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] + pytest._test_options["tosa_version"] = "0.80" # type: ignore[attr-defined] if config.option.arm_run_tosa_version: pytest._test_options["tosa_version"] = config.option.arm_run_tosa_version - pytest._test_options["tosa_ref_model"] = True # type: ignore[attr-defined] + # Not all deployments of ET have the TOSA reference model available. + # Make sure we don't try to use it if it's not available. + try: + if pytest._test_options["tosa_version"] == "0.80": + import tosa_tools.v0_80.tosa_reference_model as tosa_reference_model + else: + import tosa_tools.tosa_ref_model as tosa_reference_model + except ImportError: + pytest._test_options["tosa_ref_model"] = False # type: ignore[attr-defined] + tosa_reference_model = None # noqa logging.basicConfig(level=logging.INFO, stream=sys.stdout) From 7e1f3e3680f7e33dd9d9a949e3940ee7875745d4 Mon Sep 17 00:00:00 2001 From: lucylq Date: Fri, 9 May 2025 00:10:57 -0700 Subject: [PATCH 021/178] Use std::align_alloc in file_data_loader Differential Revision: D74041198 Pull Request resolved: https://github.com/pytorch/executorch/pull/10660 --- .github/workflows/pull.yml | 4 +- extension/data_loader/file_data_loader.cpp | 69 +++----------- .../test/backend_integration_test.cpp | 4 +- runtime/platform/compiler.h | 95 +++++++++++++++++++ 4 files changed, 110 insertions(+), 62 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 795272688bd..2dc1fcde36e 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -434,9 +434,7 @@ jobs: output=$(ls -la cmake-out/test/size_test) arr=($output) size=${arr[4]} - # threshold=48120 on devserver with gcc11.4 - # todo(lfq): update once binary size is below 50kb. - threshold="47552" + threshold="47560" if [[ "$size" -le "$threshold" ]]; then echo "Success $size <= $threshold" else diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 503539774a5..e9922eb8323 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -49,20 +49,6 @@ namespace { static bool is_power_of_2(size_t value) { return value > 0 && (value & ~(value - 1)) == value; } - -/** - * Returns the next alignment for a given pointer. - */ -static uint8_t* align_pointer(void* ptr, size_t alignment) { - intptr_t addr = reinterpret_cast(ptr); - if ((addr & (alignment - 1)) == 0) { - // Already aligned. - return reinterpret_cast(ptr); - } - // Bump forward. - addr = (addr | (alignment - 1)) + 1; - return reinterpret_cast(addr); -} } // namespace FileDataLoader::~FileDataLoader() { @@ -129,13 +115,13 @@ namespace { /** * FreeableBuffer::FreeFn-compatible callback. * - * `context` is actually a ptrdiff_t value (not a pointer) that contains the - * offset in bytes between `data` and the actual pointer to free. + * `context` is the original buffer pointer. It is allocated with + * ET_ALIGNED_ALLOC, and must be freed with ET_ALIGNED_FREE. + * + * `data` and `size` are unused. */ void FreeSegment(void* context, void* data, ET_UNUSED size_t size) { - ptrdiff_t offset = reinterpret_cast(context); - ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset); - std::free(static_cast(data) - offset); + ET_ALIGNED_FREE(context); } } // namespace @@ -163,57 +149,26 @@ Result FileDataLoader::load( } // Allocate memory for the FreeableBuffer. - size_t alloc_size = size; - if (alignment_ > alignof(std::max_align_t)) { - // malloc() will align to smaller values, but we must manually align to - // larger values. - alloc_size += alignment_; - } - void* buffer = std::malloc(alloc_size); - if (buffer == nullptr) { + void* aligned_buffer = ET_ALIGNED_ALLOC(alignment_, size); + if (aligned_buffer == nullptr) { ET_LOG( Error, - "Reading from %s at offset %zu: malloc(%zd) failed", + "Reading from %s at offset %zu: ET_ALIGNED_ALLOC(%zd, %zd) failed", file_name_, offset, + alignment_, size); return Error::MemoryAllocationFailed; } - // Align. - void* aligned_buffer = align_pointer(buffer, alignment_); - - // Assert that the alignment didn't overflow the buffer. - ET_DCHECK_MSG( - reinterpret_cast(aligned_buffer) + size <= - reinterpret_cast(buffer) + alloc_size, - "aligned_buffer %p + size %zu > buffer %p + alloc_size %zu", - aligned_buffer, - size, - buffer, - alloc_size); - auto err = load_into(offset, size, segment_info, aligned_buffer); if (err != Error::Ok) { - // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`. - std::free(buffer); + ET_ALIGNED_FREE(aligned_buffer); return err; } - // We can't naively free this pointer, since it may not be what malloc() gave - // us. Pass the offset to the real buffer as context. This is the number of - // bytes that need to be subtracted from the FreeableBuffer::data() pointer to - // find the actual pointer to free. - return FreeableBuffer( - aligned_buffer, - size, - FreeSegment, - /*free_fn_context=*/ - reinterpret_cast( - // Using signed types here because it will produce a signed ptrdiff_t - // value, though for us it will always be non-negative. - reinterpret_cast(aligned_buffer) - - reinterpret_cast(buffer))); + // Pass the aligned_buffer pointer as context to FreeSegment. + return FreeableBuffer(aligned_buffer, size, FreeSegment, aligned_buffer); } Result FileDataLoader::size() const { diff --git a/runtime/executor/test/backend_integration_test.cpp b/runtime/executor/test/backend_integration_test.cpp index ea9467907c7..e2e61f171eb 100644 --- a/runtime/executor/test/backend_integration_test.cpp +++ b/runtime/executor/test/backend_integration_test.cpp @@ -656,8 +656,8 @@ class DelegateDataAlignmentTest : public ::testing::TestWithParam { // The delegate data inline alignment used by the -da1024 file. return 1024; } else { - // A small alignment that's compatible with any realistic alignment. - return 4; + // Minimum alignment expected by program.cpp. + return alignof(std::max_align_t); } } diff --git a/runtime/platform/compiler.h b/runtime/platform/compiler.h index 7467d5c1e04..da7e0988a62 100644 --- a/runtime/platform/compiler.h +++ b/runtime/platform/compiler.h @@ -171,6 +171,101 @@ using ssize_t = ptrdiff_t; #endif +/** + * Platform-specific aligned memory allocation and deallocation. + * + * Usage: + * void* ptr = ET_ALIGNED_ALLOC(alignment, size); + * // use ptr... + * ET_ALIGNED_FREE(ptr); + * + * Note: alignment must be a power of 2 and size must be an integral multiple of + * alignment. + */ +#if defined(_MSC_VER) +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + _aligned_malloc(((size + alignment - 1) & ~(alignment - 1)), (alignment)) +#define ET_ALIGNED_FREE(ptr) _aligned_free(ptr) +#elif defined(__APPLE__) +#include // For posix_memalign and free +inline void* et_apple_aligned_alloc(size_t alignment, size_t size) { + void* ptr = nullptr; + // The address of the allocated memory must be a multiple of sizeof(void*). + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + if (posix_memalign( + &ptr, alignment, (size + alignment - 1) & ~(alignment - 1)) != 0) { + return nullptr; + } + return ptr; +} +#define ET_ALIGNED_ALLOC(alignment, size) \ + et_apple_aligned_alloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#elif __has_builtin(__builtin_aligned_alloc) || defined(_ISOC11_SOURCE) +// Linux and posix systems that support aligned_alloc and are >= C++17. +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + ::aligned_alloc(alignment, (size + alignment - 1) & ~(alignment - 1)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#else +// If the platform doesn't support aligned_alloc, fallback to malloc. +#include +#include +inline void* et_aligned_malloc(size_t alignment, size_t size) { + // Place to store the offset to the original pointer. + size_t offset_size = sizeof(uint16_t); + + // Malloc extra space for offset + alignment. + size_t alloc_size = size + offset_size + alignment - 1; + void* ptr = std::malloc(alloc_size); + + if (ptr == nullptr) { + // Malloc failed. + return nullptr; + } + + uintptr_t addr = reinterpret_cast(ptr); + // Align the address past addr + offset_size bytes. + // This provides space to store the offset before the aligned pointer. + addr = addr + offset_size; + uintptr_t aligned_ptr = (addr + alignment - 1) & ~(alignment - 1); + + // Check that alignment didn't overflow the buffer. + if (reinterpret_cast(aligned_ptr) + size > + reinterpret_cast(ptr) + alloc_size) { + std::free(ptr); + return nullptr; + } + + // Store the offset to the original pointer. + // Used to free the original allocated buffer. + *(reinterpret_cast(aligned_ptr) - 1) = + (uint16_t)(reinterpret_cast(aligned_ptr) - + reinterpret_cast(ptr)); + + return reinterpret_cast(aligned_ptr); +} + +inline void et_aligned_free(void* ptr) { + if (ptr == nullptr) { + return; + } + + // Get the original pointer using the offset. + uint16_t* original_ptr = reinterpret_cast( + reinterpret_cast(ptr) - + *(reinterpret_cast(ptr) - 1)); + std::free(original_ptr); +} + +#define ET_ALIGNED_ALLOC(alignment, size) et_aligned_malloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) et_aligned_free(ptr) + +#endif + // DEPRECATED: Use the non-underscore-prefixed versions instead. // TODO(T199005537): Remove these once all users have stopped using them. #define __ET_DEPRECATED ET_DEPRECATED From 6759d35d49d988430dbab4cb22f320a0b0f21d1a Mon Sep 17 00:00:00 2001 From: Thomas Jannaud Date: Fri, 9 May 2025 00:45:45 -0700 Subject: [PATCH 022/178] fix transpose / permutations fusion pass Differential Revision: D74447383 Pull Request resolved: https://github.com/pytorch/executorch/pull/10780 --- backends/cadence/aot/fuse_ops.py | 13 +++++- .../aot/tests/test_fusion_ops_passes.py | 44 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py index 77184c7af77..7a20a3f64b4 100644 --- a/backends/cadence/aot/fuse_ops.py +++ b/backends/cadence/aot/fuse_ops.py @@ -885,6 +885,9 @@ class FuseTransposeOrPermuteOpPairsPass(FuseOpPairsAcrossBranchesPass): """ Fuse transpose or permute op pairs to a single view op. (transpose or permutation) -> (quant or dequant) -> (transpose or permutation) + This happens when op2(op1) == identity, modulo unitary dimensions. + 'unitary dimensions' example: a tensor of shape [1, 5, 30] is equivalent (in memory) to [5, 1, 30] + so transpose(1, 2) then transpose(0, 2) is a pseudo identity and should be fused. """ # A list of ops that can be bypassed when looking for a @@ -908,7 +911,7 @@ def can_fuse_for_chain( if not super().can_fuse_for_chain(producer, consumer, consumer_op_packets): return False - # checking that permut2(permut1(identify)) == identity + # checking that permut2(permut1(identity)) == identity, modulo unitary dimensions input_shape = cast(torch.fx.Node, producer.args[0]).meta["val"].shape ident_dims = list(range(len(input_shape))) # this mapping helps to handle both transpose and permutations @@ -918,7 +921,10 @@ def can_fuse_for_chain( } in_dims = f[producer.target](producer, ident_dims) out_dims = f[consumer.target](consumer, in_dims) - return out_dims == ident_dims + # Filtering out unitary dimensions + non_unit_ident_dims = [dim for dim in ident_dims if input_shape[dim] != 1] + non_unit_out_dims = [dim for dim in out_dims if input_shape[dim] != 1] + return non_unit_out_dims == non_unit_ident_dims def get_fused_node( self, @@ -926,6 +932,9 @@ def get_fused_node( consumer: torch.fx.Node, graph_module: torch.fx.GraphModule, ) -> torch.fx.Node: + # This step is important because of how we can fuse transpositions that are not perfectly + # reverse one of another but will be fused if there are unitary dimensions. + # The fused operation must have the same output shape as the consumer. output_shape = consumer.meta["val"].shape with graph_module.graph.inserting_after(consumer): view = graph_module.graph.call_function( diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py index 1bb44b872d2..4e267254488 100644 --- a/backends/cadence/aot/tests/test_fusion_ops_passes.py +++ b/backends/cadence/aot/tests/test_fusion_ops_passes.py @@ -584,6 +584,28 @@ def _create_operator( exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, False, ), + # transpose -> quant -> transpose is not the reverse BUT there is a UNITARY dimension + # so it ends up being the same on memory => fuse + ( + True, + [0, 1], + True, + [0, 2], + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + True, + [5, 40, 1], + ), + # transpose -> quant -> transpose is not the reverse, and unitary dimensions + # don't help => don't fuse + ( + True, + [0, 1], + True, + [1, 3], + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + False, + [5, 40, 1, 4], + ), # permutation -> quant -> opposite permutation => fuse ( False, @@ -622,6 +644,28 @@ def _create_operator( False, [4, 4, 4], ), + # permutation -> quant -> a non reverse permutation BUT there is a UNITARY dimension + # so it ends up being the same on memory => fuse + ( + False, + [1, 3, 2, 0], + False, + [3, 2, 1, 0], + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + True, + [3, 1, 8, 10], + ), + # permutation -> quant -> a non reverse permutation, and unitary dimensions + # don't help => don't fuse + ( + False, + [1, 3, 2, 0], + False, + [3, 1, 2, 0], + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + False, + [3, 1, 8, 10], + ), # transpose -> quant -> transpose as a permutation => fuse ( True, From 54a14d9bbdf1c229e44db23b4eb44bf1b64513cc Mon Sep 17 00:00:00 2001 From: per held Date: Fri, 9 May 2025 13:10:42 +0200 Subject: [PATCH 023/178] Arm backend: Suppress colors in pre-push if non-interactive (#10783) Dont try to print with colors in the pre-push script if the script is non-interactive. This is to avoid getting broken output in the CI which doesnt support colors. Signed-off-by: per.held@arm.com --- backends/arm/scripts/pre-push | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/backends/arm/scripts/pre-push b/backends/arm/scripts/pre-push index b755f2bcc48..804abbe26a4 100755 --- a/backends/arm/scripts/pre-push +++ b/backends/arm/scripts/pre-push @@ -8,11 +8,13 @@ # non-interactive mode. "$#" gives the number of positional arguments. [ "$#" -eq 0 ] && is_script_interactive=1 || is_script_interactive=0 -RESET='\e[0m' -RED='\e[31m' -GREEN='\e[32m' -YELLOW='\e[33m' -BLUE='\e[34m' +if [ $is_script_interactive -eq 1 ]; then + RESET='\e[0m' + RED='\e[31m' + GREEN='\e[32m' + YELLOW='\e[33m' + BLUE='\e[34m' +fi INFO="${BLUE}[INFO]${RESET}" WARNING="${YELLOW}[WARNING]${RESET}" From f7c906f6158d546c84495ca308806e6944cb9ea5 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Fri, 9 May 2025 11:18:03 -0500 Subject: [PATCH 024/178] Cortex-M: Use q/dq ops in Arm Ethos Runner (#10782) --- CMakeLists.txt | 6 + backends/arm/scripts/build_executorch.sh | 1 + backends/arm/test/test_arm_baremetal.sh | 7 ++ backends/cortex_m/CMakeLists.txt | 61 ++++++++++ .../cortex_m/ops/op_dequantize_per_tensor.cpp | 93 +++++++++++---- .../cortex_m/ops/op_quantize_per_tensor.cpp | 111 +++++++++++++++--- examples/arm/aot_arm_compiler.py | 85 +++++++++++++- examples/arm/executor_runner/CMakeLists.txt | 22 +++- examples/arm/run.sh | 20 +++- 9 files changed, 360 insertions(+), 46 deletions(-) create mode 100644 backends/cortex_m/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 76c75270d5f..4d292c209a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -242,6 +242,8 @@ option(EXECUTORCH_USE_DL "Use libdl library" ON) option(EXECUTORCH_BUILD_CADENCE "Build the Cadence DSP backend" OFF) +option(EXECUTORCH_BUILD_CORTEX_M "Build the Cortex-M backend" OFF) + # # pthreadpool: build pthreadpool library. Disable on unsupported platforms # @@ -715,6 +717,10 @@ if(EXECUTORCH_BUILD_XNNPACK) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack) endif() +if(EXECUTORCH_BUILD_CORTEX_M) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m) +endif() + if(EXECUTORCH_BUILD_DEVTOOLS) if(NOT EXECUTORCH_BUILD_ARM_BAREMETAL) set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER diff --git a/backends/arm/scripts/build_executorch.sh b/backends/arm/scripts/build_executorch.sh index 87d9fd23070..573f93221d4 100755 --- a/backends/arm/scripts/build_executorch.sh +++ b/backends/arm/scripts/build_executorch.sh @@ -129,6 +129,7 @@ cmake \ -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_BUILD_CORTEX_M=ON \ -DEXECUTORCH_ENABLE_LOGGING=ON \ ${build_devtools_flags} \ ${build_with_etdump_flags} \ diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index 48cee9acd95..476d417a69a 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -154,6 +154,13 @@ test_run_ethosu_fvp() { # End to End model tests using run.sh echo "${TEST_SUITE_NAME}: Test ethos-u target Ethos-U85" examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=add examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=mul + + # Cortex-M op tests + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=qadd --bundleio + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=qops --bundleio + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u55-128 --model_name=qops --bundleio --no_delegate --portable_kernels="aten::sub.out,aten::add.out,aten::mul.out" + examples/arm/run.sh --et_build_root=arm_test/test_run --target=ethos-u85-128 --model_name=qops --bundleio + echo "${TEST_SUITE_NAME}: PASS" } diff --git a/backends/cortex_m/CMakeLists.txt b/backends/cortex_m/CMakeLists.txt new file mode 100644 index 00000000000..39638bf0ee4 --- /dev/null +++ b/backends/cortex_m/CMakeLists.txt @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Kernel library for Cortex-M operators. Please keep this file formatted by running: +# ~~~ +# cmake-format -i CMakeLists.txt +# ~~~ +cmake_minimum_required(VERSION 3.19) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() + +# Source root directory for executorch. +if(NOT EXECUTORCH_ROOT) + set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) +endif() + +include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) +include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake) + +if(NOT PYTHON_EXECUTABLE) + resolve_python_executable() +endif() + +# Cortex-M ops kernel sources +set(_cortex_m_kernels__srcs + ${CMAKE_CURRENT_SOURCE_DIR}/ops/op_quantize_per_tensor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ops/op_dequantize_per_tensor.cpp +) + +# Generate C++ bindings to register kernels into Executorch (for runtime). +# Here select all ops in operators.yaml +set(_yaml_file ${CMAKE_CURRENT_LIST_DIR}/ops/operators.yaml) +gen_selected_ops(LIB_NAME "cortex_m_ops_lib" OPS_SCHEMA_YAML "${_yaml_file}") + +# Generate bindings for the kernels +generate_bindings_for_kernels( + LIB_NAME "cortex_m_ops_lib" CUSTOM_OPS_YAML "${_yaml_file}" +) +message("Generated files ${gen_command_sources}") + +# Build a library for _cortex_m_kernels_srcs +add_library(cortex_m_kernels ${_cortex_m_kernels__srcs}) +target_link_libraries(cortex_m_kernels PRIVATE executorch) +target_compile_options(cortex_m_kernels PUBLIC ${_common_compile_options}) + +# cortex_m_ops_lib: Register Cortex-M ops kernels into Executorch runtime +gen_operators_lib( + LIB_NAME "cortex_m_ops_lib" KERNEL_LIBS cortex_m_kernels DEPS executorch +) + +install( + TARGETS cortex_m_kernels cortex_m_ops_lib + DESTINATION lib + PUBLIC_HEADER DESTINATION include/executorch/backends/cortex_m/ops/ +) diff --git a/backends/cortex_m/ops/op_dequantize_per_tensor.cpp b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp index 1011de73be7..6d3f3698c67 100644 --- a/backends/cortex_m/ops/op_dequantize_per_tensor.cpp +++ b/backends/cortex_m/ops/op_dequantize_per_tensor.cpp @@ -29,6 +29,7 @@ namespace { */ void check_dequantize_args( const Tensor& input, + int64_t zero_point, int64_t quant_min, int64_t quant_max, ScalarType dtype, @@ -39,6 +40,18 @@ void check_dequantize_args( "input.scalar_type() %" PRId8 " is not char type", static_cast(input.scalar_type())); + // Check zp range + ET_CHECK_MSG( + zero_point >= quant_min, + "zero_point must be %" PRId64 " <= quant_min %" PRId64, + zero_point, + quant_min); + ET_CHECK_MSG( + zero_point <= quant_max, + "zero_point must be %" PRId64 " >= quant_max %" PRId64, + zero_point, + quant_max); + // Check output dtype is float ET_CHECK_MSG( out.scalar_type() == ScalarType::Float, @@ -73,18 +86,10 @@ void check_dequantize_args( /** * Scalar implementation of quantization for a single value. */ -template -T dequantize_val( - float scale, - int32_t zero_point, - K value, - int64_t quant_min, - int64_t quant_max) { - (void)quant_min; - (void)quant_max; - return static_cast((static_cast(value) - zero_point) * scale); +template +F dequantize_val(float scale, int32_t zero_point, Q qvalue) { + return static_cast((static_cast(qvalue) - zero_point) * scale); } - } // namespace Tensor& dequantize_per_tensor_out( @@ -106,29 +111,71 @@ Tensor& dequantize_per_tensor_out( "Failed to resize out Tensor in dequantize_per_tensor_out"); // Validate input parameters - check_dequantize_args(input, quant_min, quant_max, dtype, out); + check_dequantize_args(input, zero_point, quant_min, quant_max, dtype, out); - // Pre-compute inverse scale for better performance int32_t zp = static_cast(zero_point); - int32_t qmin = static_cast(quant_min); - int32_t qmax = static_cast(quant_max); // Get pointers to input and output data const int8_t* input_data = input.const_data_ptr(); float* out_data = out.mutable_data_ptr(); const size_t numel = input.numel(); + size_t i = 0; #if defined(HAS_HELIUM_SIMD) -// Helium MVE implementation for float32 to int8 quantization -#Error "Implement MVE version!" -#else - // Scalar implementation for float32 to int8 quantization - for (size_t i = 0; i < numel; i++) { - out_data[i] = - dequantize_val(scale, zp, input_data[i], qmin, qmax); + // Helium MVE implementation for int8 to float quantization + static uint8x16_t voffset{ + 0x0, + 0x8, + 0x4, + 0xC, + 0x1, + 0x9, + 0x5, + 0xD, + 0x2, + 0xA, + 0x6, + 0xE, + 0x3, + 0xB, + 0x7, + 0xF}; + + int16x8_t vzp = vdupq_n_s16(static_cast(zp)); + float32x4_t vscale = vdupq_n_f32(static_cast(scale)); + + for (; i + 15 < numel; i += 16) { + int8x16_t in_084C195D2A6E3B7F = + vldrbq_gather_offset_s8(input_data, voffset); + + int16x8_t in_04152637 = vsubq_s16(vmovlbq_s8(in_084C195D2A6E3B7F), vzp); + int16x8_t in_8C9DAEBF = vsubq_s16(vmovltq_s8(in_084C195D2A6E3B7F), vzp); + + float32x4_t inf_0123 = vcvtq_f32_s32(vmovlbq_s16(in_04152637)); + float32x4_t inf_4567 = vcvtq_f32_s32(vmovltq_s16(in_04152637)); + float32x4_t inf_89AB = vcvtq_f32_s32(vmovlbq_s16(in_8C9DAEBF)); + float32x4_t inf_CDEF = vcvtq_f32_s32(vmovltq_s16(in_8C9DAEBF)); + + float32x4_t out_0123 = vmulq_f32(inf_0123, vscale); + float32x4_t out_4567 = vmulq_f32(inf_4567, vscale); + float32x4_t out_89AB = vmulq_f32(inf_89AB, vscale); + float32x4_t out_CDEF = vmulq_f32(inf_CDEF, vscale); + + vstrwq_f32(out_data + 0, out_0123); + vstrwq_f32(out_data + 4, out_4567); + vstrwq_f32(out_data + 8, out_89AB); + vstrwq_f32(out_data + 12, out_CDEF); + + input_data += 16; + out_data += 16; } -#endif +#endif // defined(HAS_HELIUM_SIMD) + for (; i < numel; i++) { + *out_data = dequantize_val(scale, zp, *input_data); + *input_data++; + *out_data++; + } return out; } diff --git a/backends/cortex_m/ops/op_quantize_per_tensor.cpp b/backends/cortex_m/ops/op_quantize_per_tensor.cpp index 25385602e58..d92d2666a8f 100644 --- a/backends/cortex_m/ops/op_quantize_per_tensor.cpp +++ b/backends/cortex_m/ops/op_quantize_per_tensor.cpp @@ -41,13 +41,13 @@ void check_quantize_args( "input.scalar_type() %" PRId8 " is not float type", static_cast(input.scalar_type())); - // Check output dtype is int8 (Char) + // Check output dtype is int8 ET_CHECK_MSG( out.scalar_type() == ScalarType::Char, "out.scalar_type() %" PRId8 " is not int8 (Char)", static_cast(out.scalar_type())); - // Check dtype is int8 (Char) + // Check dtype is int8 ET_CHECK_MSG( dtype == ScalarType::Char, "dtype %" PRId8 " is not int8 (Char)", @@ -75,18 +75,18 @@ void check_quantize_args( /** * Scalar implementation of quantization for a single value. */ -template -T quantize_val( - float inv_scale, +template +Q quantize_val( + F inv_scale, int32_t zero_point, - K value, + F value, int64_t quant_min, int64_t quant_max) { int32_t qvalue = zero_point + static_cast(std::nearbyint(inv_scale * value)); qvalue = std::max(qvalue, static_cast(quant_min)); qvalue = std::min(qvalue, static_cast(quant_max)); - return static_cast(qvalue); + return static_cast(qvalue); } } // namespace @@ -123,16 +123,97 @@ Tensor& quantize_per_tensor_out( int8_t* out_data = out.mutable_data_ptr(); const size_t numel = input.numel(); + size_t i = 0; + #if defined(HAS_HELIUM_SIMD) -// Helium MVE implementation for float32 to int8 quantization -#Error "Implement MVE version!" -#else - // Scalar implementation for float32 to int8 quantization - for (size_t i = 0; i < numel; i++) { - out_data[i] = - quantize_val(inv_scale, zp, input_data[i], qmin, qmax); + // Helium MVE implementation for float32 to int8 quantization + static uint8x16_t voffset{ + 0x0, + 0x8, + 0x4, + 0xC, + 0x1, + 0x9, + 0x5, + 0xD, + 0x2, + 0xA, + 0x6, + 0xE, + 0x3, + 0xB, + 0x7, + 0xF}; + + float32x4_t inv_scale_vec = vdupq_n_f32(inv_scale); + + // Magic number for float to int conversion, round to nearest even integer + // int magic_round(float f): interpret_as_int32(f + magic_float) - magic_int + // where, + // magic_float = 12582912.0f = (2 ** 23 + 2 ** 22) = (1.5 * 2 ** 23) + // magic_int = 1262485504 = 0x4B400000 = bit_pattern_as_int32(magic_float) + + float magic_float = 12582912.0f; + int32_t magic_int = 1262485504; + + float32x4_t vmagic_float = vdupq_n_f32(magic_float); + int32x4_t vmagic_int_less_zp = + vdupq_n_s32(magic_int - static_cast(zp)); + + int16x8_t vqmin = vdupq_n_s16(qmin); + int16x8_t vqmax = vdupq_n_s16(qmax); + + // TODO: Measure performnce, we are spilling + for (; i + 15 < numel; i += 16) { + float32x4_t in_0123 = vldrwq_f32(input_data + 0); + float32x4_t in_4567 = vldrwq_f32(input_data + 4); + float32x4_t in_89AB = vldrwq_f32(input_data + 8); + float32x4_t in_CDEF = vldrwq_f32(input_data + 12); + + float32x4_t outf_0123 = vfmaq_f32(vmagic_float, in_0123, inv_scale_vec); + float32x4_t outf_4567 = vfmaq_f32(vmagic_float, in_4567, inv_scale_vec); + float32x4_t outf_89AB = vfmaq_f32(vmagic_float, in_89AB, inv_scale_vec); + float32x4_t outf_CDEF = vfmaq_f32(vmagic_float, in_CDEF, inv_scale_vec); + + int32x4_t out_0123 = + vsubq_s32(vreinterpretq_s32_f32(outf_0123), vmagic_int_less_zp); + int32x4_t out_4567 = + vsubq_s32(vreinterpretq_s32_f32(outf_4567), vmagic_int_less_zp); + int32x4_t out_89AB = + vsubq_s32(vreinterpretq_s32_f32(outf_89AB), vmagic_int_less_zp); + int32x4_t out_CDEF = + vsubq_s32(vreinterpretq_s32_f32(outf_CDEF), vmagic_int_less_zp); + + int16x8_t out_04152637; + int16x8_t out_8C9DAEBF; + out_04152637 = vmovnbq_s32(out_04152637, out_0123); + out_04152637 = vmovntq_s32(out_04152637, out_4567); + out_8C9DAEBF = vmovnbq_s32(out_8C9DAEBF, out_89AB); + out_8C9DAEBF = vmovntq_s32(out_8C9DAEBF, out_CDEF); + + int16x8_t out_04152637_clamped = + vminq_s16(vmaxq_s16(out_04152637, vqmin), vqmax); + int16x8_t out_8C9DAEBF_clamped = + vminq_s16(vmaxq_s16(out_8C9DAEBF, vqmin), vqmax); + + int8x16_t out_084C195D2A6E3B7F; + out_084C195D2A6E3B7F = + vmovnbq_s16(out_084C195D2A6E3B7F, out_04152637_clamped); + out_084C195D2A6E3B7F = + vmovntq_s16(out_084C195D2A6E3B7F, out_8C9DAEBF_clamped); + + vstrbq_scatter_offset_s8(out_data, voffset, out_084C195D2A6E3B7F); + input_data += 16; + out_data += 16; + } +#endif // defined(HAS_HELIUM_SIMD) + + for (; i < numel; i++) { + *out_data = + quantize_val(inv_scale, zp, *input_data, qmin, qmax); + input_data++; + out_data++; } -#endif return out; } diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 3d6acf2b94a..73fa4b24d4e 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -40,6 +40,11 @@ ) from executorch.backends.arm.vgf_partitioner import VgfPartitioner + +# To use Cortex-M backend +from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import ( + ReplaceQuantNodesPass, +) from executorch.devtools.backend_debug import get_delegation_info from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite @@ -59,6 +64,7 @@ from ..models import MODEL_NAME_TO_MODEL from ..models.model_factory import EagerModelFactory + FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" logging.basicConfig(level=logging.WARNING, format=FORMAT) @@ -216,6 +222,54 @@ def forward(self, x, y): can_delegate = True +class QuantAddTest(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, a): + return a + a + + example_input = (torch.rand([13, 3], dtype=torch.float32),) # a - normal values + can_delegate = True # when quantized + + +class QuantAddTest2(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, a, b): + p = a + a + q = b + b + r = p + q + return p, q, r + + example_input = ( + torch.randn([13, 7, 3], dtype=torch.float32), + torch.randn([13, 7, 3], dtype=torch.float32), + ) + can_delegate = True # when quantized + + +class QuantOpTest(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, w, x, y, z): + o1 = w - x + o2 = o1 + y + o3 = o2 * z + return o1, o2, o3 + + example_input = ( + torch.randn([3, 1, 2], dtype=torch.float32), # w - normal values + torch.randn([3, 5, 2], dtype=torch.float32), # x - normal values + torch.randn([3, 5, 1], dtype=torch.float32) + * -0.000001, # y - small -ve values, needs to be calibration for tests + torch.randn([3, 5, 2], dtype=torch.float32) * 1000, # z - large values + ) + can_delegate = True # when quantized + + class SoftmaxModule(torch.nn.Module): def __init__(self): super().__init__() @@ -241,6 +295,9 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): "add": AddModule, "add2": AddModule2, "add3": AddModule3, + "qadd": QuantAddTest, + "qadd2": QuantAddTest2, + "qops": QuantOpTest, "softmax": SoftmaxModule, "MultipleOutputsModule": MultipleOutputsModule, } @@ -255,6 +312,17 @@ def forward(self, x: torch.Tensor, y: torch.Tensor): torch.randn(32, 5), torch.randn(32, 5), ), + "qadd": (torch.randn(32, 2, 1),), + "qadd2": ( + torch.randn(32, 2, 1), + torch.randn(32, 2, 1), + ), + "qops": ( + torch.randn(32, 2, 1), + torch.randn(32, 2, 1), + torch.randn(32, 2, 1) * -0.000001, + torch.randn(32, 2, 1) * 1000, + ), "softmax": (torch.randn(32, 2, 2),), } @@ -656,6 +724,7 @@ def to_edge_TOSA_delegate( _check_ir_validity=False, ), ) + return model_int8, edge @@ -681,9 +750,18 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_ _check_ir_validity=False, ), ) + return model_int8, edge +def transform_for_cortex_m_backend(edge): + # Let's make sure we are using optimized Cortex M backend + # NB: If we can't find and replace ops those are expected to be replaced, + # bad things will happen at runtime, like "missing operator" errors! + edge = edge.transform([ReplaceQuantNodesPass()]) + return edge + + if __name__ == "__main__": # noqa: C901 args = get_args() @@ -715,6 +793,9 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_ exported_program, args, model, example_inputs ) + # Transform so we can use ops from the Cortex M backend + edge = transform_for_cortex_m_backend(edge) + dump_delegation_info(edge, args.intermediates) try: @@ -759,7 +840,9 @@ def to_edge_no_delegate(exported_program, args, model: torch.nn.Module, example_ output_name = os.path.join(args.output, output_name) if args.bundleio: - save_bpte_program(exec_prog, original_model, output_name) + # Realize the quantization impact on numerics when generating reference output + reference_model = original_model if not model_int8 else model_int8 + save_bpte_program(exec_prog, reference_model, output_name) print(f"Bundle PTE file saved as {output_name}") else: save_pte_program(exec_prog, output_name) diff --git a/examples/arm/executor_runner/CMakeLists.txt b/examples/arm/executor_runner/CMakeLists.txt index 63cdcc45aad..1568bef0301 100644 --- a/examples/arm/executor_runner/CMakeLists.txt +++ b/examples/arm/executor_runner/CMakeLists.txt @@ -492,7 +492,6 @@ set_property( PROPERTY IMPORTED_LOCATION "${ET_BUILD_DIR_PATH}/kernels/portable/libportable_kernels.a" ) - add_library(quantized_ops_lib STATIC IMPORTED) set_property( TARGET quantized_ops_lib @@ -505,7 +504,18 @@ set_property( PROPERTY IMPORTED_LOCATION "${ET_BUILD_DIR_PATH}/kernels/quantized/libquantized_kernels.a" ) - +add_library(cortex_m_ops_lib STATIC IMPORTED) +set_property( + TARGET cortex_m_ops_lib + PROPERTY IMPORTED_LOCATION + "${ET_BUILD_DIR_PATH}/backends/cortex_m/libcortex_m_ops_lib.a" +) +add_library(cortex_m_kernels STATIC IMPORTED) +set_property( + TARGET cortex_m_kernels + PROPERTY IMPORTED_LOCATION + "${ET_BUILD_DIR_PATH}/backends/cortex_m/libcortex_m_kernels.a" +) add_library(extension_runner_util STATIC IMPORTED) set_property( TARGET extension_runner_util @@ -546,9 +556,11 @@ list(APPEND arm_executor_runner_link executorch "-Wl,--whole-archive" executorch_delegate_ethos_u + cortex_m_ops_lib quantized_ops_lib portable_ops_lib quantized_kernels + cortex_m_kernels portable_kernels "-Wl,--no-whole-archive" -Xlinker -Map=arm_executor_runner.map @@ -561,7 +573,7 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) set_property( TARGET etdump PROPERTY IMPORTED_LOCATION - "${ET_BUILD_DIR_PATH}/lib/libetdump.a" + "${ET_BUILD_DIR_PATH}/lib/libetdump.a" ) if(CMAKE_BUILD_TYPE MATCHES "Debug") @@ -574,7 +586,7 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER) set_property( TARGET ${FLATCCRT_LIB} PROPERTY IMPORTED_LOCATION - "${ET_BUILD_DIR_PATH}/lib/lib${FLATCCRT_LIB}.a" + "${ET_BUILD_DIR_PATH}/lib/lib${FLATCCRT_LIB}.a" ) list(APPEND arm_executor_runner_link @@ -643,4 +655,4 @@ if(SEMIHOSTING) ${ETHOS_SDK_PATH}/core_platform/targets/${TARGET_BOARD}/retarget.c PROPERTIES HEADER_FILE_ONLY TRUE ) -endif() \ No newline at end of file +endif() diff --git a/examples/arm/run.sh b/examples/arm/run.sh index 01699087443..ed1cbc5e015 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -177,8 +177,24 @@ backends/arm/scripts/build_portable_kernels.sh --et_build_root="${et_build_root} if [[ -z "$model_name" ]]; then # the test models run, and whether to delegate - test_model=( "softmax" "add" "add3" "mv2" ) - model_compiler_flags=( "" "--delegate" "--delegate" "--delegate --quantize" ) + test_model=( + "softmax" # 0 + "add" # 1 + "add3" # 2 + "qadd" # 3 + "qadd3" # 4 + "qops" # 5 + "mv2" # 6 + ) + model_compiler_flags=( + "" # 0 softmax + "--delegate" # 1 add + "--delegate" # 2 add3 + "--delegate --quantize" # 3 qadd + "--delegate --quantize" # 4 qadd3 + "--delegate --quantize" # 5 qops + "--delegate --quantize" # 6 mv2 + ) else test_model=( "$model_name" ) model_compiler_flags=( "$aot_arm_compiler_flag_delegate $aot_arm_compiler_flag_quantize $aot_arm_compiler_flags" ) From b98c3abc6a367ba6945020f8fb813b4a0e68eda3 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Fri, 9 May 2025 10:34:43 -0700 Subject: [PATCH 025/178] Save some size in pattern/{bitwise,comparison}_op.h (#10489) bloaty told me that we were paying a noticeable size cost for the ::value members of these structs (at least after the PR in this stack that reapplies #9841) and now we're not. Test Plan: bash test/build_optimized_size_test.sh ``` before: adopt functionref ========== ExecuTorch with no ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 153928 Apr 25 11:08 cmake-out/test/size_test ExecuTorch with portable ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 2150960 Apr 25 11:08 cmake-out/test/size_test_all_ops ExecuTorch with optimized ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 5927336 Apr 25 11:08 cmake-out/test/size_test_all_optimized_ops (.venv) swolchok@swolchok-mac ~/src/executorch> size cmake-out/test/size_test* __TEXT __DATA __OBJC others dec hex 81920 81920 0 4295049216 4295213056 10003c000 cmake-out/test/size_test 1474560 81920 0 4295655424 4297211904 100224000 cmake-out/test/size_test_all_ops 4505600 98304 0 4296376320 4300980224 1005bc000 cmake-out/test/size_test_all_optimized_ops after: ExecuTorch with no ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 153928 Apr 25 12:24 cmake-out/test/size_test ExecuTorch with portable ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 2150960 Apr 25 12:24 cmake-out/test/size_test_all_ops ExecuTorch with optimized ops binary size, unstripped: -rwxr-xr-x 1 swolchok staff 5887368 Apr 25 12:24 cmake-out/test/size_test_all_optimized_ops (.venv) swolchok@swolchok-mac ~/src/executorch> size cmake-out/test/size_test* __TEXT __DATA __OBJC others dec hex 81920 81920 0 4295049216 4295213056 10003c000 cmake-out/test/size_test 1474560 81920 0 4295655424 4297211904 100224000 cmake-out/test/size_test_all_ops 4489216 98304 0 4296359936 4300947456 1005b4000 cmake-out/test/size_test_all_optimized_ops ``` (yes it's neutral; improves size results for further diffs) --- .lintrunner.toml | 7 +++ kernels/portable/cpu/op_bitwise_and.cpp | 6 ++- kernels/portable/cpu/op_bitwise_or.cpp | 6 ++- kernels/portable/cpu/op_bitwise_xor.cpp | 6 ++- kernels/portable/cpu/op_eq.cpp | 8 ++- kernels/portable/cpu/op_ge.cpp | 8 ++- kernels/portable/cpu/op_gt.cpp | 6 ++- kernels/portable/cpu/op_le.cpp | 8 ++- kernels/portable/cpu/op_lt.cpp | 6 ++- kernels/portable/cpu/op_ne.cpp | 8 ++- kernels/portable/cpu/pattern/bitwise_op.h | 15 +++--- kernels/portable/cpu/pattern/comparison_op.h | 54 ++------------------ 12 files changed, 63 insertions(+), 75 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index ae0d134f8c7..2835af1bf92 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -220,6 +220,13 @@ exclude_patterns = [ 'extension/**', 'kernels/optimized/**', # Justified include. + 'kernels/portable/cpu/op_bitwise*.cpp', + 'kernels/portable/cpu/op_eq.cpp', + 'kernels/portable/cpu/op_ge.cpp', + 'kernels/portable/cpu/op_gt.cpp', + 'kernels/portable/cpu/op_le.cpp', + 'kernels/portable/cpu/op_lt.cpp', + 'kernels/portable/cpu/op_ne.cpp', 'runtime/kernel/thread_parallel_interface.h', 'scripts/**', 'third-party/**', diff --git a/kernels/portable/cpu/op_bitwise_and.cpp b/kernels/portable/cpu/op_bitwise_and.cpp index f62d0b70dd4..609dcb1e949 100644 --- a/kernels/portable/cpu/op_bitwise_and.cpp +++ b/kernels/portable/cpu/op_bitwise_and.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,7 @@ Tensor& bitwise_and_Tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_and.Tensor_out"; - return internal::bitwise_tensor_out(ctx, a, b, out); + return internal::bitwise_tensor_out(ctx, a, b, out); } Tensor& bitwise_and_Scalar_out( @@ -29,7 +31,7 @@ Tensor& bitwise_and_Scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_and.Scalar_out"; - return internal::bitwise_scalar_out(ctx, a, b, out); + return internal::bitwise_scalar_out(ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_bitwise_or.cpp b/kernels/portable/cpu/op_bitwise_or.cpp index 8028815fbf9..42cb2a6c3ba 100644 --- a/kernels/portable/cpu/op_bitwise_or.cpp +++ b/kernels/portable/cpu/op_bitwise_or.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,7 @@ Tensor& bitwise_or_Tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_or.Tensor_out"; - return internal::bitwise_tensor_out(ctx, a, b, out); + return internal::bitwise_tensor_out(ctx, a, b, out); } Tensor& bitwise_or_Scalar_out( @@ -29,7 +31,7 @@ Tensor& bitwise_or_Scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_or.Scalar_out"; - return internal::bitwise_scalar_out(ctx, a, b, out); + return internal::bitwise_scalar_out(ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_bitwise_xor.cpp b/kernels/portable/cpu/op_bitwise_xor.cpp index 85badf95789..5fe4e1708d5 100644 --- a/kernels/portable/cpu/op_bitwise_xor.cpp +++ b/kernels/portable/cpu/op_bitwise_xor.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,7 @@ Tensor& bitwise_xor_Tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_xor.Tensor_out"; - return internal::bitwise_tensor_out(ctx, a, b, out); + return internal::bitwise_tensor_out(ctx, a, b, out); } Tensor& bitwise_xor_Scalar_out( @@ -29,7 +31,7 @@ Tensor& bitwise_xor_Scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "bitwise_xor.Scalar_out"; - return internal::bitwise_scalar_out(ctx, a, b, out); + return internal::bitwise_scalar_out(ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_eq.cpp b/kernels/portable/cpu/op_eq.cpp index bddb6181ee0..9e21b82c43c 100644 --- a/kernels/portable/cpu/op_eq.cpp +++ b/kernels/portable/cpu/op_eq.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,8 @@ Tensor& eq_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "eq.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out( + ctx, a, b, out); } Tensor& eq_scalar_out( @@ -29,7 +32,8 @@ Tensor& eq_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "eq.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out( + ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_ge.cpp b/kernels/portable/cpu/op_ge.cpp index 8457f91b548..d5e7576b7ae 100644 --- a/kernels/portable/cpu/op_ge.cpp +++ b/kernels/portable/cpu/op_ge.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,8 @@ Tensor& ge_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "ge.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out( + ctx, a, b, out); } Tensor& ge_scalar_out( @@ -29,7 +32,8 @@ Tensor& ge_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "ge.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out( + ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_gt.cpp b/kernels/portable/cpu/op_gt.cpp index bb1f6a274cd..cd65a3b68d9 100644 --- a/kernels/portable/cpu/op_gt.cpp +++ b/kernels/portable/cpu/op_gt.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,7 @@ Tensor& gt_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "gt.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out(ctx, a, b, out); } Tensor& gt_scalar_out( @@ -29,7 +31,7 @@ Tensor& gt_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "gt.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out(ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_le.cpp b/kernels/portable/cpu/op_le.cpp index e893678fc5e..909de1bfad2 100644 --- a/kernels/portable/cpu/op_le.cpp +++ b/kernels/portable/cpu/op_le.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,8 @@ Tensor& le_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "le.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out( + ctx, a, b, out); } Tensor& le_scalar_out( @@ -29,7 +32,8 @@ Tensor& le_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "le.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out( + ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_lt.cpp b/kernels/portable/cpu/op_lt.cpp index 6f1ffb21153..5af89920536 100644 --- a/kernels/portable/cpu/op_lt.cpp +++ b/kernels/portable/cpu/op_lt.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,7 @@ Tensor& lt_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "lt.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out(ctx, a, b, out); } Tensor& lt_scalar_out( @@ -29,7 +31,7 @@ Tensor& lt_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "lt.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out(ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/op_ne.cpp b/kernels/portable/cpu/op_ne.cpp index 5e5a2d38a33..a4b292359df 100644 --- a/kernels/portable/cpu/op_ne.cpp +++ b/kernels/portable/cpu/op_ne.cpp @@ -8,6 +8,8 @@ #include +#include + namespace torch { namespace executor { namespace native { @@ -19,7 +21,8 @@ Tensor& ne_tensor_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "ne.Tensor_out"; - return internal::comparison_tensor_out(ctx, a, b, out); + return internal::comparison_tensor_out( + ctx, a, b, out); } Tensor& ne_scalar_out( @@ -29,7 +32,8 @@ Tensor& ne_scalar_out( Tensor& out) { // @lint-ignore CLANGTIDY facebook-hte-CArray static constexpr const char op_name[] = "ne.Scalar_out"; - return internal::comparison_scalar_out(ctx, a, b, out); + return internal::comparison_scalar_out( + ctx, a, b, out); } } // namespace native diff --git a/kernels/portable/cpu/pattern/bitwise_op.h b/kernels/portable/cpu/pattern/bitwise_op.h index 6e4c111b8f2..b8d8acf3382 100644 --- a/kernels/portable/cpu/pattern/bitwise_op.h +++ b/kernels/portable/cpu/pattern/bitwise_op.h @@ -47,11 +47,13 @@ constexpr bitwise_fn get_bitwise_fn() { template struct BitwiseFnForOp { - static constexpr auto value = get_bitwise_fn(); - static_assert(value != nullptr, "unknown op_name!"); + static constexpr auto get_value() { + return get_bitwise_fn(); + } + static_assert(get_value() != nullptr, "unknown op_name!"); }; -template +template