diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index d8c7ae1a570..1125df6b402 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -87,7 +87,7 @@ def get_u55_compile_spec( macs: int = 128, system_config: str = "Ethos_U55_High_End_Embedded", memory_mode: str = "Shared_Sram", - extra_flags: str = "--debug-force-regor --output-format=raw", + extra_flags: str = "--debug-force-regor --output-format=raw --arena-cache-size=2097152", custom_path: Optional[str] = None, config: Optional[str] = None, tosa_debug_mode: EthosUCompileSpec.DebugMode | None = None, @@ -122,7 +122,7 @@ def get_u85_compile_spec( macs: int = 128, system_config="Ethos_U85_SYS_DRAM_Mid", memory_mode="Shared_Sram", - extra_flags="--output-format=raw", + extra_flags="--output-format=raw --arena-cache-size=2097152", custom_path: Optional[str] = None, config: Optional[str] = None, tosa_debug_mode: EthosUCompileSpec.DebugMode | None = None, diff --git a/backends/arm/test/conftest.py b/backends/arm/test/conftest.py index 0060bf0ea63..8a08c74efc4 100644 --- a/backends/arm/test/conftest.py +++ b/backends/arm/test/conftest.py @@ -25,10 +25,6 @@ def pytest_configure(config): if getattr(config.option, "llama_inputs", False) and config.option.llama_inputs: pytest._test_options["llama_inputs"] = config.option.llama_inputs # type: ignore[attr-defined] - pytest._test_options["fast_fvp"] = False # type: ignore[attr-defined] - if getattr(config.option, "fast_fvp", False): - pytest._test_options["fast_fvp"] = config.option.fast_fvp # type: ignore[attr-defined] - pytest._test_options["tosa_version"] = "1.0" # type: ignore[attr-defined] if config.option.arm_run_tosa_version: pytest._test_options["tosa_version"] = config.option.arm_run_tosa_version @@ -49,7 +45,6 @@ def try_addoption(*args, **kwargs): try_addoption("--arm_quantize_io", action="store_true", help="Deprecated.") try_addoption("--arm_run_corstoneFVP", action="store_true", help="Deprecated.") - try_addoption("--fast_fvp", action="store_true") try_addoption( "--llama_inputs", nargs="+", diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py index aa2f49b5e53..c9fe32bf86c 100644 --- a/backends/arm/test/ops/test_permute.py +++ b/backends/arm/test/ops/test_permute.py @@ -76,11 +76,7 @@ def test_permute_tosa_INT(test_data: torch.Tensor): pipeline.run() -@common.parametrize( - "test_data", - test_data_suite, - xfails={"rank_4_3": "MLETORCH-955 : Permutation numerical diff for u55"}, -) +@common.parametrize("test_data", test_data_suite) @common.XfailIfNoCorstone300 def test_permute_u55_INT(test_data): test_data, dims = test_data() diff --git a/backends/arm/test/runner_utils.py b/backends/arm/test/runner_utils.py index bc890d53bc4..ae1fc136ce7 100644 --- a/backends/arm/test/runner_utils.py +++ b/backends/arm/test/runner_utils.py @@ -28,7 +28,6 @@ ) from executorch.backends.arm.ethosu import EthosUCompileSpec -from executorch.backends.arm.test.conftest import is_option_enabled from executorch.backends.arm.tosa.compile_spec import TosaCompileSpec from executorch.backends.arm.tosa.specification import Tosa_1_00, TosaSpecification from executorch.backends.arm.vgf import VgfCompileSpec @@ -414,10 +413,6 @@ def run_corstone( "The argument passed to the FVP should be less than 256 characters long, otherwise it gets truncated" ) - ethos_u_extra_args = "" - if is_option_enabled("fast_fvp"): - ethos_u_extra_args = ethos_u_extra_args + "--fast" - match target_board: case "corstone-300": command_args = [ @@ -435,12 +430,12 @@ def run_corstone( "-C", "cpu0.semihosting-stack_base=0", "-C", - f"ethosu.extra_args='{ethos_u_extra_args}'", - "-C", "cpu0.semihosting-heap_limit=0", "-C", f"cpu0.semihosting-cwd={intermediate_path}", "-C", + "ethosu.extra_args='--fast'", + "-C", f"cpu0.semihosting-cmd_line='{cmd_line}'", "-a", str(elf_path), @@ -473,7 +468,7 @@ def run_corstone( "-C", f"mps4_board.subsystem.cpu0.semihosting-cwd={intermediate_path}", "-C", - f"mps4_board.subsystem.ethosu.extra_args='{ethos_u_extra_args}'", + "mps4_board.subsystem.ethosu.extra_args='--fast'", "-C", f"mps4_board.subsystem.cpu0.semihosting-cmd_line='{cmd_line}'", "-a", @@ -719,20 +714,25 @@ def assert_elf_path_exists(elf_path): ) -def get_elf_path(target_board): +def get_elf_path(target_board: str, use_portable_ops: bool = False): if target_board not in VALID_TARGET: raise ValueError(f"Unsupported target: {target_board}") + if use_portable_ops: + portable_ops_str = "portable-ops_" + else: + portable_ops_str = "" + if target_board in ("corstone-300", "corstone-320"): elf_path = os.path.join( "arm_test", - f"arm_semihosting_executor_runner_{target_board}", + f"arm_semihosting_executor_runner_{portable_ops_str}{target_board}", "arm_executor_runner", ) assert_elf_path_exists(elf_path) elif target_board == "vkml_emulation_layer": elf_path = os.path.join( - "arm_test/arm_executor_runner_vkml", + f"arm_test/arm_executor_runner_{portable_ops_str}vkml", "executor_runner", ) assert_elf_path_exists(elf_path) @@ -740,9 +740,9 @@ def get_elf_path(target_board): return elf_path -def arm_executor_runner_exists(target_board): +def arm_executor_runner_exists(target_board: str, use_portable_ops: bool = False): try: - get_elf_path(target_board) + get_elf_path(target_board, use_portable_ops=use_portable_ops) except: return False else: diff --git a/backends/arm/test/setup_testing.sh b/backends/arm/test/setup_testing.sh index d1e4725d93b..bb68361c238 100755 --- a/backends/arm/test/setup_testing.sh +++ b/backends/arm/test/setup_testing.sh @@ -10,6 +10,23 @@ script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") et_root_dir=$(realpath "${script_dir}/../../..") build_executor_runner=${et_root_dir}/backends/arm/scripts/build_executor_runner.sh build_root_test_dir=${et_root_dir}/arm_test/arm_semihosting_executor_runner +extraflags="-DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=83886080" -${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --output="${build_root_test_dir}_corstone-300" -${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --output="${build_root_test_dir}_corstone-320" +# By default tests with an elf without any portable_ops +# If you supply use_portable_ops=True when creating the ArmTester() +# you will instead test with some portable ops compiled in, see list below. + +#--target --system_config --memory_mode should match the ArmTester used setup see backends/arm/test/common.py + +${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --output="${build_root_test_dir}_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --output="${build_root_test_dir}_corstone-320" --extra_build_flags=${extraflags} + +# List of portable ops used by testing, this is mainly used to test models in the flow +# test setup to make sure models that are not fully delegated can still be tested and run OK +# To use this you can set use_portable_ops=True when creating ArmTester() + +portable_ops_list_u55="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::squeeze_copy.dims,dim_order_ops::_clone_dim_order.out" +portable_ops_list_u85="aten::permute_copy.out,aten::convolution.out,aten::relu.out,aten::_native_batch_norm_legit_no_training.out,aten::as_strided_copy.out,aten::mean.out,aten::full_like.out,aten::bmm.out,aten::scalar_tensor.out,aten::index.Tensor_out,aten::where.self_out" + +${build_executor_runner} --pte=semihosting --target=ethos-u55-128 --system_config=Ethos_U55_High_End_Embedded --memory_mode=Shared_Sram --select_ops_list="${portable_ops_list_u55}" --output="${build_root_test_dir}_portable-ops_corstone-300" --extra_build_flags=${extraflags} +${build_executor_runner} --pte=semihosting --target=ethos-u85-128 --system_config=Ethos_U85_SYS_DRAM_Mid --memory_mode=Dedicated_Sram_384KB --select_ops_list="${portable_ops_list_u85}" --output="${build_root_test_dir}_portable-ops_corstone-320" --extra_build_flags=${extraflags} diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index 604253b6c92..d5484dacbe1 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -250,6 +250,8 @@ def __init__( transform_passes: Optional[ Union[Sequence[PassType], Dict[str, Sequence[PassType]]] ] = None, + use_portable_ops: bool = False, + timeout: int = 600, ): """ Args: @@ -271,6 +273,8 @@ def __init__( # Initial model needs to be set as a *possible* but not yet added Stage, therefore add None entry. self.stages[StageType.INITIAL_MODEL] = None self._run_stage(InitialModel(self.original_module)) + self.use_portable_ops = use_portable_ops + self.timeout = timeout def quantize( self, @@ -348,13 +352,15 @@ def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = Non return super().to_executorch(to_executorch_stage) def serialize( - self, serialize_stage: Optional[Serialize] = None, timeout: int = 480 + self, + serialize_stage: Optional[Serialize] = None, ): if serialize_stage is None: serialize_stage = Serialize( compile_spec=self.compile_spec, module=self.original_module, - timeout=timeout, + use_portable_ops=self.use_portable_ops, + timeout=self.timeout, ) assert ( self.compile_spec.get_intermediate_path() is not None diff --git a/backends/arm/test/tester/serialize.py b/backends/arm/test/tester/serialize.py index f0fd246b3a6..33e57cc721d 100644 --- a/backends/arm/test/tester/serialize.py +++ b/backends/arm/test/tester/serialize.py @@ -31,12 +31,14 @@ def __init__( self, compile_spec: ArmCompileSpec, module: Optional[torch.nn.Module], + use_portable_ops: bool = False, timeout: int = 120, ): """ Args: compile_spec: CompileSpecs to be used for serialization. module: Original Module to be used for serialization. Optional - can be used for reference output generation. + portable_ops: If True tests with compiled in portable ops, default is to test without this to get error if not fully delegated timeout: Timeout for fvp. Default is 120 seconds. """ super().__init__() @@ -44,6 +46,7 @@ def __init__( self.timeout = timeout self.executorch_program_manager: ExecutorchProgramManager | None self.compile_spec = compile_spec + self.use_portable_ops = use_portable_ops def run(self, artifact: ExecutorchProgramManager, inputs=None) -> None: super().run(artifact, inputs) @@ -58,7 +61,7 @@ def run_artifact(self, inputs): inputs_flattened, _ = tree_flatten(inputs) intermediate_path = self.compile_spec.get_intermediate_path() target_board = get_target_board(self.compile_spec) - elf_path = get_elf_path(target_board) + elf_path = get_elf_path(target_board, self.use_portable_ops) if not os.path.exists(elf_path): raise FileNotFoundError( diff --git a/backends/test/suite/flows/arm.py b/backends/test/suite/flows/arm.py index 85674331eda..a690e4681f8 100644 --- a/backends/test/suite/flows/arm.py +++ b/backends/test/suite/flows/arm.py @@ -20,11 +20,15 @@ def _create_arm_flow( compile_spec: ArmCompileSpec, symmetric_io_quantization: bool = False, per_channel_quantization: bool = True, + use_portable_ops: bool = True, + timeout: int = 1200, ) -> TestFlow: def _create_arm_tester(*args, **kwargs) -> ArmTester: kwargs["compile_spec"] = compile_spec - return ArmTester(*args, **kwargs) + return ArmTester( + *args, **kwargs, use_portable_ops=use_portable_ops, timeout=timeout + ) support_serialize = not isinstance(compile_spec, TosaCompileSpec) quantize = compile_spec.tosa_spec.support_integer()