pytorch
diff --git a/‎.ci/scripts/unittest-macos.sh‎
Lines changed: 3 additions & 1 deletion b/‎.ci/scripts/unittest-macos.sh‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 32 additions & 53 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 32 additions & 53 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/TARGETS‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/operator_support/TARGETS‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/test/misc/test_multiple_outputs.py‎
Lines changed: 4 additions & 6 deletions b/‎backends/arm/test/misc/test_multiple_outputs.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎backends/arm/test/ops/test_bmm.py‎
Lines changed: 4 additions & 3 deletions b/‎backends/arm/test/ops/test_bmm.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/test/ops/test_layer_norm.py‎
Lines changed: 2 additions & 31 deletions b/‎backends/arm/test/ops/test_layer_norm.py‎
Lines changed: 2 additions & 31 deletions
diff --git a/‎backends/arm/test/ops/test_logsoftmax.py‎
Lines changed: 18 additions & 28 deletions b/‎backends/arm/test/ops/test_logsoftmax.py‎
Lines changed: 18 additions & 28 deletions
diff --git a/‎backends/arm/test/ops/test_mean_dim.py‎
Lines changed: 9 additions & 3 deletions b/‎backends/arm/test/ops/test_mean_dim.py‎
Lines changed: 9 additions & 3 deletions
@@ -38,7 +38,9 @@ ${CONDA_RUN} --no-capture-output \
 .ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
 
 # Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
+PYTHON_EXECUTABLE=python \
+${CONDA_RUN} --no-capture-output \
+./examples/models/llama3_2_vision/install_requirements.sh
 
 if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-macos-cmake.sh
 
@@ -374,7 +374,13 @@ jobs:
     secrets: inherit
     strategy:
       matrix:
-        hf_model_repo: [google/gemma-2-2b]
+        hf_model_id: [
+          google/gemma-2-2b,
+          Qwen/Qwen2.5-0.5B,
+          HuggingFaceTB/SmolLM2-135M,
+          meta-llama/Llama-3.2-1B,
+          allenai/OLMo-1B-hf
+        ]
       fail-fast: false
     with:
       secrets-env: EXECUTORCH_HF_TOKEN
@@ -389,66 +395,39 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
-
-        echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
-        rm -rf cmake-out
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out .
-        cmake --build cmake-out -j9 --target install --config Release
-
-        echo "Build llama runner"
-        dir="examples/models/llama"
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out/${dir} \
-            ${dir}
-        cmake --build cmake-out/${dir} -j9 --config Release
         echo "::endgroup::"
 
-        echo "::group::Set up HuggingFace Dependencies"
-        if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
-          echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
-          exit 1
-        fi
+        echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        git clone https://github.com/huggingface/optimum-executorch
+        cd optimum-executorch
+        # There is no release yet, for CI stability, always test from the same commit on main
+        git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
+        pip install .
         pip install accelerate sentencepiece
         pip list
         echo "::endgroup::"
 
-        echo "::group::Export to ExecuTorch"
-        TOKENIZER_FILE=tokenizer.model
-        TOKENIZER_BIN_FILE=tokenizer.bin
-        ET_MODEL_NAME=et_model
-        DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
-        if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
-            echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
-            python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
-            ls ./tokenizer.bin
-        else
-            echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
-            exit 1
-        fi
-
-        python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
-
-        cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
+        echo "::group::Export and Run ${{ matrix.hf_model_id }}"
+        # Pass matrix variable as environment variable
+        export MODEL_ID="${{ matrix.hf_model_id }}"
+        python -c "
+        import os
+        from optimum.executorch import ExecuTorchModelForCausalLM
+        from transformers import AutoTokenizer
+
+        model_id = os.getenv('MODEL_ID')
+        print(f'Loading model: {model_id}')
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+          tokenizer=tokenizer,
+          prompt='Simply put, the theory of relativity states that',
+          max_seq_len=64
+        )
+        print(generated_text)
+        "
         echo "::endgroup::"
 
 
 
@@ -76,6 +76,7 @@ runtime.cxx_python_extension(
     base_module = "",
     visibility = [
         "//executorch/examples/apple/coreml/...",
+        "@EXECUTORCH_CLIENTS",
     ],
     external_deps = [
         "pybind11",
 
@@ -7,6 +7,7 @@ python_library(
     deps = [
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/transforms:replace_scalar_with_tensor",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
     ],
 
@@ -5,8 +5,9 @@ python_library(
     srcs = glob(["*.py"]),
     typing = True,
     deps = [
+        "//executorch/backends/arm/_passes:passes",
+        "//executorch/backends/arm:tosa_specification",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
-        "//executorch/backends/arm:tosa_specification"
     ],
 )
@@ -76,23 +76,21 @@ def _test_ethosu_BI_pipeline(
             tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
 
     @pytest.mark.corstone_fvp
-    def test_u85_BI(self):
+    def test_u55_BI(self):
         module = self.MultipleOutputsModule()
         test_data = module.get_inputs()
         self._test_ethosu_BI_pipeline(
             module,
             test_data,
-            common.get_u85_compile_spec(),
+            common.get_u55_compile_spec(),
         )
 
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
-    # TODO MLETORCH-598
-    def test_u55_BI(self):
+    def test_u85_BI(self):
         module = self.MultipleOutputsModule()
         test_data = module.get_inputs()
         self._test_ethosu_BI_pipeline(
             module,
             test_data,
-            common.get_u55_compile_spec(),
+            common.get_u85_compile_spec(),
         )
@@ -150,9 +150,10 @@ def test_bmm_single_input_tosa_BI(self, test_data_generator: Callable[[], Tuple]
         test_data = test_data_generator()
         self._test_bmm_tosa_BI_pipeline(self.BMMSingleInput(), test_data)
 
+    # Expected to fail on FVP as TOSA.MATMUL is not supported on U55
     @parameterized.expand(BMM.test_data_generators)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
+    @conftest.expectedFailureOnFVP
     def test_bmm_u55_BI_xfails(self, test_data_generator: Callable[[], Tuple]):
         test_data = test_data_generator()
         self._test_bmm_ethosu_BI_pipeline(
@@ -167,10 +168,10 @@ def test_bmm_u85_BI(self, test_data_generator: Callable[[], Tuple]):
             self.BMM(), common.get_u85_compile_spec(), test_data
         )
 
-    # Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy
+    # Expected to fail on FVP as TOSA.MATMUL is not supported on U55
     @parameterized.expand(BMMSingleInput.test_data_generators)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
+    @conftest.expectedFailureOnFVP
     def test_bmm_single_input_u55_BI_xfails(
         self, test_data_generator: Callable[[], Tuple]
     ):
 
@@ -158,7 +158,7 @@ def test_layer_norm_tosa_BI(
             self.LayerNorm(*model_params), (test_data,)
         )
 
-    @parameterized.expand(test_data_suite[4:])
+    @parameterized.expand(test_data_suite)
     @pytest.mark.corstone_fvp
     def test_layer_norm_u55_BI(
         self,
@@ -170,36 +170,7 @@ def test_layer_norm_u55_BI(
             self.LayerNorm(*model_params), common.get_u55_compile_spec(), (test_data,)
         )
 
-    # Numerical issues on FVP likely due to mul op, MLETORCH-521
-    # Skip tests that require transposes.
-    @parameterized.expand(test_data_suite[:4])
-    @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
-    def test_layer_norm_u55_BI_xfails(
-        self,
-        test_name: str,
-        test_data: torch.Tensor,
-        model_params,
-    ):
-        self._test_layernorm_ethosu_BI_pipeline(
-            self.LayerNorm(*model_params), common.get_u55_compile_spec(), (test_data,)
-        )
-
-    # Numerical issues on FVP likely due to mul op, MLETORCH-521
-    @parameterized.expand(test_data_suite[:-2])
-    @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
-    def test_layer_norm_u85_BI_xfails(
-        self,
-        test_name: str,
-        test_data: torch.Tensor,
-        model_params,
-    ):
-        self._test_layernorm_ethosu_BI_pipeline(
-            self.LayerNorm(*model_params), common.get_u85_compile_spec(), (test_data,)
-        )
-
-    @parameterized.expand(test_data_suite[-2:])
+    @parameterized.expand(test_data_suite)
     @pytest.mark.corstone_fvp
     def test_layer_norm_u85_BI(
         self,
 
@@ -11,7 +11,7 @@
 import pytest
 
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
@@ -28,16 +28,17 @@
     lambda: ("randn", torch.randn(10, 10, 10, 10), 3),
     lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3),
 ]
-test_data_generators_u55 = [
+
+test_data_generators_FVP = [
     # (test_name, test_data, dim)
     lambda: ("ones", torch.ones(10, 10), 1),
     lambda: ("ones_neg_dim", torch.ones(10, 3, 4), -1),
-    lambda: ("randn_neg_dim", torch.randn(10, 5, 8, 7), -3),
-    lambda: ("zeros", torch.zeros(10, 8, 5, 2), 0),
-    lambda: ("zeros_neg_dim", torch.zeros(10, 7, 8, 9), -4),
+    lambda: ("randn_neg_dim", torch.randn(1, 5, 8, 7), -3),
+    lambda: ("zeros", torch.zeros(1, 8, 5, 2), 0),
+    lambda: ("zeros_neg_dim", torch.zeros(1, 7, 8, 9), -4),
     lambda: ("rand", torch.rand(1, 2, 5, 8), 2),
-    lambda: ("rand_neg_dim", torch.rand(2, 10, 8, 10), -2),
-    lambda: ("randn", torch.randn(10, 10, 10, 10), 3),
+    lambda: ("rand_neg_dim", torch.rand(1, 10, 8, 10), -2),
+    lambda: ("randn", torch.randn(1, 10, 10, 10), 3),
 ]
 
 
@@ -99,7 +100,7 @@ def _test_logsoftmax_tosa_ethos_BI_pipeline(
         module: torch.nn.Module,
         test_data: Tuple[torch.tensor],
     ):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -114,21 +115,10 @@ def _test_logsoftmax_tosa_ethos_BI_pipeline(
             .check_not(["executorch_exir_dialects_edge__ops_aten__logsoftmax_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
+            .serialize()
         )
-
-    def _test_logsoftmax_tosa_u55_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
-    ):
-        self._test_logsoftmax_tosa_ethos_BI_pipeline(
-            common.get_u55_compile_spec(), module, test_data
-        )
-
-    def _test_logsoftmax_tosa_u85_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
-    ):
-        self._test_logsoftmax_tosa_ethos_BI_pipeline(
-            common.get_u85_compile_spec(), module, test_data
-        )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(inputs=test_data, qtol=1)
 
     @parameterized.expand(test_data_generators)
     def test_logsoftmax_tosa_MI(self, test_data_generator: Callable[[], Tuple]):
@@ -141,18 +131,18 @@ def test_logsoftmax_tosa_BI(self, test_data_generator: Callable[[], Tuple]):
         test_name, test_data, dim = test_data_generator()
         self._test_logsoftmax_tosa_BI_pipeline(self.LogSoftmax(dim=dim), (test_data,))
 
-    @parameterized.expand(test_data_generators_u55)
+    @parameterized.expand(test_data_generators_FVP)
     @pytest.mark.flaky  # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation
     def test_logsoftmax_tosa_u55_BI(self, test_data_generator: Callable[[], Tuple]):
         test_name, test_data, dim = test_data_generator()
-        self._test_logsoftmax_tosa_u55_BI_pipeline(
-            self.LogSoftmax(dim=dim), (test_data,)
+        self._test_logsoftmax_tosa_ethos_BI_pipeline(
+            common.get_u55_compile_spec(), self.LogSoftmax(dim=dim), (test_data,)
         )
 
-    @parameterized.expand(test_data_generators)
+    @parameterized.expand(test_data_generators_FVP)
     @pytest.mark.flaky  # TODO: MLETORCH-460 - Numerically stabler (log)softmax implementation
     def test_logsoftmax_tosa_u85_BI(self, test_data_generator: Callable[[], Tuple]):
         test_name, test_data, dim = test_data_generator()
-        self._test_logsoftmax_tosa_u85_BI_pipeline(
-            self.LogSoftmax(dim=dim), (test_data,)
+        self._test_logsoftmax_tosa_ethos_BI_pipeline(
+            common.get_u85_compile_spec(), self.LogSoftmax(dim=dim), (test_data,)
         )
@@ -10,7 +10,7 @@
 from typing import Tuple
 
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.exir.backend.backend_details import CompileSpec
 from parameterized import parameterized
@@ -121,7 +121,7 @@ def _test_adaptive_avg_pool2d_tosa_ethosu_BI_pipeline(
         compile_spec: CompileSpec,
         test_data: Tuple[torch.tensor],
     ):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -141,7 +141,10 @@ def _test_adaptive_avg_pool2d_tosa_ethosu_BI_pipeline(
             )
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
+            .serialize()
         )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(inputs=test_data)
 
     def _test_meandim_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
@@ -188,7 +191,7 @@ def _test_meandim_tosa_ethosu_BI_pipeline(
         compile_spec: CompileSpec,
         test_data: Tuple[torch.tensor],
     ):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -207,7 +210,10 @@ def _test_meandim_tosa_ethosu_BI_pipeline(
             )
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
+            .serialize()
         )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(inputs=test_data, qtol=1)
 
     @parameterized.expand(AdaptiveAveragePool2d.test_data_suite)
     def test_adaptive_avg_pool2d_tosa_MI(