pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_arm.sh‎
Lines changed: 18 additions & 0 deletions b/‎.ci/docker/common/install_arm.sh‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 3 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.ci/scripts/export_model_cuda_artifact.sh‎ renamed to ‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 56 additions & 19 deletions b/‎.ci/scripts/export_model_cuda_artifact.sh‎ renamed to ‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 56 additions & 19 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 8 additions & 9 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 1 addition & 4 deletions b/‎.ci/scripts/test_llama_lora.sh‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎.ci/scripts/test_model_cuda_e2e.sh‎ renamed to ‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 35 additions & 34 deletions b/‎.ci/scripts/test_model_cuda_e2e.sh‎ renamed to ‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 35 additions & 34 deletions
@@ -1 +1 @@
-4361747abfc55e40e929396ed986efe775d745f9
+d03e90c2cd9048e6d9a75285c0355f033cd016fc
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+install_arm_prerequiresites() {
+    apt-get update -y
+    apt-get install -y --no-install-recommends \
+            mesa-vulkan-drivers libvulkan1
+    rm -rf /var/lib/apt/lists/*
+}
+
+install_arm_prerequiresites
@@ -83,6 +83,9 @@ RUN if [ -n "${ANDROID_NDK_VERSION}" ]; then bash ./install_android.sh; fi
 RUN rm install_android.sh
 
 ARG ARM_SDK
+COPY ./common/install_arm.sh install_arm.sh
+RUN if [ -n "${ARM_SDK}" ]; then bash ./install_arm.sh; fi
+RUN rm install_arm.sh
 
 ARG ZEPHYR_SDK
 COPY ./common/install_zephyr.sh install_zephyr.sh
 
@@ -5,19 +5,21 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# Export model to CUDA format with optional quantization
+# Export model to CUDA/Metal format with optional quantization
 
 show_help() {
   cat << EOF
-Usage: export_model_cuda_artifact.sh <hf_model> [quant_name] [output_dir]
+Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir]
 
-Export a HuggingFace model to CUDA format with optional quantization.
+Export a HuggingFace model to CUDA/Metal format with optional quantization.
 
 Arguments:
+  device       cuda or metal (required)
+
   hf_model     HuggingFace model ID (required)
                Supported models:
                  - mistralai/Voxtral-Mini-3B-2507
-                 - openai/whisper-small
+                 - openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
                  - google/gemma-3-4b-it
 
   quant_name   Quantization type (optional, default: non-quantized)
@@ -29,9 +31,9 @@ Arguments:
   output_dir   Output directory for artifacts (optional, default: current directory)
 
 Examples:
-  export_model_cuda_artifact.sh "openai/whisper-small"
-  export_model_cuda_artifact.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
-  export_model_cuda_artifact.sh "google/gemma-3-4b-it" "non-quantized" "./output"
+  export_model_artifact.sh metal "openai/whisper-small"
+  export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
+  export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
 EOF
 }
 
@@ -48,9 +50,22 @@ fi
 
 set -eux
 
-HF_MODEL="$1"
-QUANT_NAME="${2:-non-quantized}"
-OUTPUT_DIR="${3:-.}"
+DEVICE="$1"
+HF_MODEL="$2"
+QUANT_NAME="${3:-non-quantized}"
+OUTPUT_DIR="${4:-.}"
+
+case "$DEVICE" in
+  cuda)
+    ;;
+  metal)
+    ;;
+  *)
+    echo "Error: Unsupported device '$DEVICE'"
+    echo "Supported devices: cuda, metal"
+    exit 1
+    ;;
+esac
 
 # Determine model configuration based on HF model ID
 case "$HF_MODEL" in
@@ -62,15 +77,23 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE="128"
     PREPROCESSOR_OUTPUT="voxtral_preprocessor.pte"
     ;;
-  openai/whisper-small)
+  openai/whisper-*)
     MODEL_NAME="whisper"
     TASK="automatic-speech-recognition"
     MAX_SEQ_LEN=""
     EXTRA_PIP="librosa"
-    PREPROCESSOR_FEATURE_SIZE="80"
     PREPROCESSOR_OUTPUT="whisper_preprocessor.pte"
+    if [[ "$HF_MODEL" == *"large-v3"* ]]; then
+      PREPROCESSOR_FEATURE_SIZE="128"
+    else
+      PREPROCESSOR_FEATURE_SIZE="80"
+    fi
     ;;
   google/gemma-3-4b-it)
+    if [ "$DEVICE" = "metal" ]; then
+      echo "Error: Export for device 'metal' is not yet tested for model '$HF_MODEL'"
+      exit 1
+    fi
     MODEL_NAME="gemma3"
     TASK="multimodal-text-to-text"
     MAX_SEQ_LEN="64"
@@ -80,7 +103,7 @@ case "$HF_MODEL" in
     ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it"
     exit 1
     ;;
 esac
@@ -91,9 +114,17 @@ case "$QUANT_NAME" in
     EXTRA_ARGS=""
     ;;
   quantized-int4-tile-packed)
+    if [ "$DEVICE" = "metal" ]; then
+      echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
+      exit 1
+    fi
     EXTRA_ARGS="--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
     ;;
   quantized-int4-weight-only)
+    if [ "$DEVICE" = "metal" ]; then
+      echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
+      exit 1
+    fi
     EXTRA_ARGS="--qlinear_encoder 4w"
     ;;
   *)
@@ -114,12 +145,18 @@ MAX_SEQ_LEN_ARG=""
 if [ -n "$MAX_SEQ_LEN" ]; then
   MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
 fi
+
+DEVICE_ARG=""
+if [ "$DEVICE" = "cuda" ]; then
+  DEVICE_ARG="--device cuda"
+fi
+
 optimum-cli export executorch \
     --model "$HF_MODEL" \
     --task "$TASK" \
-    --recipe "cuda" \
+    --recipe "$DEVICE" \
     --dtype bfloat16 \
-    --device cuda \
+    ${DEVICE_ARG} \
     ${MAX_SEQ_LEN_ARG} \
     ${EXTRA_ARGS} \
     --output_dir ./
@@ -133,18 +170,18 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then
 fi
 
 test -f model.pte
-test -f aoti_cuda_blob.ptd
+test -f aoti_${DEVICE}_blob.ptd
 if [ -n "$PREPROCESSOR_OUTPUT" ]; then
   test -f $PREPROCESSOR_OUTPUT
 fi
 echo "::endgroup::"
 
 echo "::group::Store $MODEL_NAME Artifacts"
 mkdir -p "${OUTPUT_DIR}"
-cp model.pte "${OUTPUT_DIR}/"
-cp aoti_cuda_blob.ptd "${OUTPUT_DIR}/"
+mv model.pte "${OUTPUT_DIR}/"
+mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/"
 if [ -n "$PREPROCESSOR_OUTPUT" ]; then
-  cp $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
+  mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
 fi
 ls -al "${OUTPUT_DIR}"
 echo "::endgroup::"
@@ -171,15 +171,14 @@ cmake_build_llama_runner() {
     git submodule update --init
     popd
     dir="examples/models/llama"
-    retry cmake \
-        -DEXECUTORCH_BUILD_TESTS=ON \
-        -DBUILD_TESTING=OFF \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
-        -Bcmake-out/${dir} \
-        ${dir}
-    cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
-
+    if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
+        PRESET="llama-debug"
+    else
+        PRESET="llama-release"
+    fi
+    pushd "${dir}"
+    cmake --workflow --preset "${PRESET}"
+    popd
 }
 
 cleanup_files() {
 
@@ -12,10 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
-    retry cmake --preset llm \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DCMAKE_BUILD_TYPE=Release
-    cmake --build cmake-out -j9 --target install --config Release
+    cmake --workflow llm-release
 }
 
 cmake_build_llama_runner() {
 
@@ -5,19 +5,21 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-# Test CUDA model end-to-end, need to run .ci/scripts/export_model_cuda_artifact.sh first
+# Test CUDA/Metal model end-to-end, need to run .ci/scripts/export_model_artifact.sh first
 
 show_help() {
   cat << EOF
-Usage: test_model_cuda_e2e.sh <hf_model> <quant_name> [model_dir]
+Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir]
 
-Build and run end-to-end tests for CUDA models.
+Build and run end-to-end tests for CUDA/Metal models.
 
 Arguments:
+  device      cuda or metal (required)
+
   hf_model    HuggingFace model ID (required)
               Supported models:
                 - mistralai/Voxtral-Mini-3B-2507
-                - openai/whisper-small
+                - openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
                 - google/gemma-3-4b-it
 
   quant_name  Quantization type (required)
@@ -27,12 +29,12 @@ Arguments:
                 - quantized-int4-weight-only
 
   model_dir   Directory containing model artifacts (optional, default: current directory)
-              Expected files: model.pte, aoti_cuda_blob.ptd
+              Expected files: model.pte, aoti_cuda_blob.ptd/aoti_metal_blob.ptd
               Tokenizers and test files will be downloaded to this directory
 
 Examples:
-  test_model_cuda_e2e.sh "openai/whisper-small" "non-quantized"
-  test_model_cuda_e2e.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
+  test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
+  test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
 EOF
 }
 
@@ -55,20 +57,21 @@ fi
 
 set -eux
 
-HF_MODEL="$1"
-QUANT_NAME="$2"
+DEVICE="$1"
+HF_MODEL="$2"
+QUANT_NAME="$3"
 # Download tokenizers, audio, and image files to this directory
-MODEL_DIR="${3:-.}"
+MODEL_DIR="${4:-.}"
 
 echo "Testing model: $HF_MODEL (quantization: $QUANT_NAME)"
 
-# Make sure model.pte and aoti_cuda_blob.ptd exist
+# Make sure model.pte and aoti_${DEVICE}_blob.ptd exist
 if [ ! -f "$MODEL_DIR/model.pte" ]; then
   echo "Error: model.pte not found in $MODEL_DIR"
   exit 1
 fi
-if [ ! -f "$MODEL_DIR/aoti_cuda_blob.ptd" ]; then
-  echo "Error: aoti_cuda_blob.ptd not found in $MODEL_DIR"
+if [ ! -f "$MODEL_DIR/aoti_${DEVICE}_blob.ptd" ]; then
+  echo "Error: aoti_${DEVICE}_blob.ptd not found in $MODEL_DIR"
   exit 1
 fi
 # Locate EXECUTORCH_ROOT from the directory of this script
@@ -91,13 +94,13 @@ case "$HF_MODEL" in
     AUDIO_FILE="poem.wav"
     IMAGE_PATH=""
     ;;
-  openai/whisper-small)
-    MODEL_NAME="whisper"
+  openai/whisper-*)
+    MODEL_NAME="${HF_MODEL#openai/}"
     RUNNER_TARGET="whisper_runner"
     RUNNER_PATH="whisper"
     EXPECTED_OUTPUT="Mr. Quilter is the apostle of the middle classes"
     PREPROCESSOR="whisper_preprocessor.pte"
-    TOKENIZER_URL="https://huggingface.co/openai/whisper-small/resolve/main" # @lint-ignore
+    TOKENIZER_URL="https://huggingface.co/${HF_MODEL}/resolve/main" # @lint-ignore
     TOKENIZER_FILE=""
     AUDIO_URL=""
     AUDIO_FILE="output.wav"
@@ -117,7 +120,7 @@ case "$HF_MODEL" in
     ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it"
     exit 1
     ;;
 esac
@@ -142,7 +145,7 @@ fi
 # Download test files
 if [ "$AUDIO_URL" != "" ]; then
   curl -L $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
-elif [ "$MODEL_NAME" = "whisper" ]; then
+elif [[ "$MODEL_NAME" == *whisper* ]]; then
   conda install -y -c conda-forge "ffmpeg<8"
   pip install datasets soundfile torchcodec
   python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
@@ -152,35 +155,33 @@ ls -al
 echo "::endgroup::"
 
 echo "::group::Build $MODEL_NAME Runner"
-cmake --preset llm \
-      -DEXECUTORCH_BUILD_CUDA=ON \
-      -DCMAKE_INSTALL_PREFIX=cmake-out \
-      -DCMAKE_BUILD_TYPE=Release \
-      -Bcmake-out -S.
-cmake --build cmake-out -j$(nproc) --target install --config Release
-
-cmake -DEXECUTORCH_BUILD_CUDA=ON \
-      -DCMAKE_BUILD_TYPE=Release \
-      -Sexamples/models/$RUNNER_PATH \
-      -Bcmake-out/examples/models/$RUNNER_PATH/
-cmake --build cmake-out/examples/models/$RUNNER_PATH --target $RUNNER_TARGET --config Release
+
+if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ]; then
+  echo "Error: Unsupported device '$DEVICE'. Must be 'cuda' or 'metal'."
+  exit 1
+fi
+
+MAKE_TARGET="${RUNNER_PATH}-${DEVICE}"
+make "${MAKE_TARGET}"
 echo "::endgroup::"
 
 echo "::group::Run $MODEL_NAME Runner"
 set +e
-export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
+if [ "$DEVICE" = "cuda" ]; then
+  export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
+fi
 
 # Build runner command with common arguments
 RUNNER_BIN="cmake-out/examples/models/$RUNNER_PATH/$RUNNER_TARGET"
-RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd --temperature 0"
+RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --data_path ${MODEL_DIR}/aoti_${DEVICE}_blob.ptd --temperature 0"
 
 # Add model-specific arguments
 case "$MODEL_NAME" in
   voxtral)
     RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
     ;;
-  whisper)
-    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
+  whisper-*)
+    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR --model_name ${MODEL_NAME}"
     ;;
   gemma3)
     RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-4361747abfc55e40e929396ed986efe775d745f9`
	`1`	`+d03e90c2cd9048e6d9a75285c0355f033cd016fc`