Skip to content

Commit f36640a

Browse files
committed
Merge remote-tracking branch origin/upstream/main
2 parents ced6a62 + 8c48a76 commit f36640a

File tree

297 files changed

+8359
-13433
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

297 files changed

+8359
-13433
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
4361747abfc55e40e929396ed986efe775d745f9
1+
d03e90c2cd9048e6d9a75285c0355f033cd016fc

.ci/docker/common/install_arm.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
# Copyright 2025 Arm Limited and/or its affiliates.
5+
#
6+
# This source code is licensed under the BSD-style license found in the
7+
# LICENSE file in the root directory of this source tree.
8+
9+
set -ex
10+
11+
install_arm_prerequiresites() {
12+
apt-get update -y
13+
apt-get install -y --no-install-recommends \
14+
mesa-vulkan-drivers libvulkan1
15+
rm -rf /var/lib/apt/lists/*
16+
}
17+
18+
install_arm_prerequiresites

.ci/docker/ubuntu/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ RUN if [ -n "${ANDROID_NDK_VERSION}" ]; then bash ./install_android.sh; fi
8383
RUN rm install_android.sh
8484

8585
ARG ARM_SDK
86+
COPY ./common/install_arm.sh install_arm.sh
87+
RUN if [ -n "${ARM_SDK}" ]; then bash ./install_arm.sh; fi
88+
RUN rm install_arm.sh
8689

8790
ARG ZEPHYR_SDK
8891
COPY ./common/install_zephyr.sh install_zephyr.sh

.ci/scripts/export_model_cuda_artifact.sh renamed to .ci/scripts/export_model_artifact.sh

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,21 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
# Export model to CUDA format with optional quantization
8+
# Export model to CUDA/Metal format with optional quantization
99

1010
show_help() {
1111
cat << EOF
12-
Usage: export_model_cuda_artifact.sh <hf_model> [quant_name] [output_dir]
12+
Usage: export_model_artifact.sh <device> <hf_model> [quant_name] [output_dir]
1313
14-
Export a HuggingFace model to CUDA format with optional quantization.
14+
Export a HuggingFace model to CUDA/Metal format with optional quantization.
1515
1616
Arguments:
17+
device cuda or metal (required)
18+
1719
hf_model HuggingFace model ID (required)
1820
Supported models:
1921
- mistralai/Voxtral-Mini-3B-2507
20-
- openai/whisper-small
22+
- openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2123
- google/gemma-3-4b-it
2224
2325
quant_name Quantization type (optional, default: non-quantized)
@@ -29,9 +31,9 @@ Arguments:
2931
output_dir Output directory for artifacts (optional, default: current directory)
3032
3133
Examples:
32-
export_model_cuda_artifact.sh "openai/whisper-small"
33-
export_model_cuda_artifact.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
34-
export_model_cuda_artifact.sh "google/gemma-3-4b-it" "non-quantized" "./output"
34+
export_model_artifact.sh metal "openai/whisper-small"
35+
export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
36+
export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
3537
EOF
3638
}
3739

@@ -48,9 +50,22 @@ fi
4850

4951
set -eux
5052

51-
HF_MODEL="$1"
52-
QUANT_NAME="${2:-non-quantized}"
53-
OUTPUT_DIR="${3:-.}"
53+
DEVICE="$1"
54+
HF_MODEL="$2"
55+
QUANT_NAME="${3:-non-quantized}"
56+
OUTPUT_DIR="${4:-.}"
57+
58+
case "$DEVICE" in
59+
cuda)
60+
;;
61+
metal)
62+
;;
63+
*)
64+
echo "Error: Unsupported device '$DEVICE'"
65+
echo "Supported devices: cuda, metal"
66+
exit 1
67+
;;
68+
esac
5469

5570
# Determine model configuration based on HF model ID
5671
case "$HF_MODEL" in
@@ -62,15 +77,23 @@ case "$HF_MODEL" in
6277
PREPROCESSOR_FEATURE_SIZE="128"
6378
PREPROCESSOR_OUTPUT="voxtral_preprocessor.pte"
6479
;;
65-
openai/whisper-small)
80+
openai/whisper-*)
6681
MODEL_NAME="whisper"
6782
TASK="automatic-speech-recognition"
6883
MAX_SEQ_LEN=""
6984
EXTRA_PIP="librosa"
70-
PREPROCESSOR_FEATURE_SIZE="80"
7185
PREPROCESSOR_OUTPUT="whisper_preprocessor.pte"
86+
if [[ "$HF_MODEL" == *"large-v3"* ]]; then
87+
PREPROCESSOR_FEATURE_SIZE="128"
88+
else
89+
PREPROCESSOR_FEATURE_SIZE="80"
90+
fi
7291
;;
7392
google/gemma-3-4b-it)
93+
if [ "$DEVICE" = "metal" ]; then
94+
echo "Error: Export for device 'metal' is not yet tested for model '$HF_MODEL'"
95+
exit 1
96+
fi
7497
MODEL_NAME="gemma3"
7598
TASK="multimodal-text-to-text"
7699
MAX_SEQ_LEN="64"
@@ -80,7 +103,7 @@ case "$HF_MODEL" in
80103
;;
81104
*)
82105
echo "Error: Unsupported model '$HF_MODEL'"
83-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
106+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it"
84107
exit 1
85108
;;
86109
esac
@@ -91,9 +114,17 @@ case "$QUANT_NAME" in
91114
EXTRA_ARGS=""
92115
;;
93116
quantized-int4-tile-packed)
117+
if [ "$DEVICE" = "metal" ]; then
118+
echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
119+
exit 1
120+
fi
94121
EXTRA_ARGS="--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
95122
;;
96123
quantized-int4-weight-only)
124+
if [ "$DEVICE" = "metal" ]; then
125+
echo "Error: Metal backend does not yet support quantization '$QUANT_NAME'"
126+
exit 1
127+
fi
97128
EXTRA_ARGS="--qlinear_encoder 4w"
98129
;;
99130
*)
@@ -114,12 +145,18 @@ MAX_SEQ_LEN_ARG=""
114145
if [ -n "$MAX_SEQ_LEN" ]; then
115146
MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
116147
fi
148+
149+
DEVICE_ARG=""
150+
if [ "$DEVICE" = "cuda" ]; then
151+
DEVICE_ARG="--device cuda"
152+
fi
153+
117154
optimum-cli export executorch \
118155
--model "$HF_MODEL" \
119156
--task "$TASK" \
120-
--recipe "cuda" \
157+
--recipe "$DEVICE" \
121158
--dtype bfloat16 \
122-
--device cuda \
159+
${DEVICE_ARG} \
123160
${MAX_SEQ_LEN_ARG} \
124161
${EXTRA_ARGS} \
125162
--output_dir ./
@@ -133,18 +170,18 @@ if [ -n "$PREPROCESSOR_OUTPUT" ]; then
133170
fi
134171

135172
test -f model.pte
136-
test -f aoti_cuda_blob.ptd
173+
test -f aoti_${DEVICE}_blob.ptd
137174
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
138175
test -f $PREPROCESSOR_OUTPUT
139176
fi
140177
echo "::endgroup::"
141178

142179
echo "::group::Store $MODEL_NAME Artifacts"
143180
mkdir -p "${OUTPUT_DIR}"
144-
cp model.pte "${OUTPUT_DIR}/"
145-
cp aoti_cuda_blob.ptd "${OUTPUT_DIR}/"
181+
mv model.pte "${OUTPUT_DIR}/"
182+
mv aoti_${DEVICE}_blob.ptd "${OUTPUT_DIR}/"
146183
if [ -n "$PREPROCESSOR_OUTPUT" ]; then
147-
cp $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
184+
mv $PREPROCESSOR_OUTPUT "${OUTPUT_DIR}/"
148185
fi
149186
ls -al "${OUTPUT_DIR}"
150187
echo "::endgroup::"

.ci/scripts/test_llama.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,14 @@ cmake_build_llama_runner() {
171171
git submodule update --init
172172
popd
173173
dir="examples/models/llama"
174-
retry cmake \
175-
-DEXECUTORCH_BUILD_TESTS=ON \
176-
-DBUILD_TESTING=OFF \
177-
-DCMAKE_INSTALL_PREFIX=cmake-out \
178-
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
179-
-Bcmake-out/${dir} \
180-
${dir}
181-
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
182-
174+
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
175+
PRESET="llama-debug"
176+
else
177+
PRESET="llama-release"
178+
fi
179+
pushd "${dir}"
180+
cmake --workflow --preset "${PRESET}"
181+
popd
183182
}
184183

185184
cleanup_files() {

.ci/scripts/test_llama_lora.sh

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212
cmake_install_executorch_libraries() {
1313
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
1414
rm -rf cmake-out
15-
retry cmake --preset llm \
16-
-DCMAKE_INSTALL_PREFIX=cmake-out \
17-
-DCMAKE_BUILD_TYPE=Release
18-
cmake --build cmake-out -j9 --target install --config Release
15+
cmake --workflow llm-release
1916
}
2017

2118
cmake_build_llama_runner() {

.ci/scripts/test_model_cuda_e2e.sh renamed to .ci/scripts/test_model_e2e.sh

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,21 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
# Test CUDA model end-to-end, need to run .ci/scripts/export_model_cuda_artifact.sh first
8+
# Test CUDA/Metal model end-to-end, need to run .ci/scripts/export_model_artifact.sh first
99

1010
show_help() {
1111
cat << EOF
12-
Usage: test_model_cuda_e2e.sh <hf_model> <quant_name> [model_dir]
12+
Usage: test_model_e2e.sh <device> <hf_model> <quant_name> [model_dir]
1313
14-
Build and run end-to-end tests for CUDA models.
14+
Build and run end-to-end tests for CUDA/Metal models.
1515
1616
Arguments:
17+
device cuda or metal (required)
18+
1719
hf_model HuggingFace model ID (required)
1820
Supported models:
1921
- mistralai/Voxtral-Mini-3B-2507
20-
- openai/whisper-small
22+
- openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2123
- google/gemma-3-4b-it
2224
2325
quant_name Quantization type (required)
@@ -27,12 +29,12 @@ Arguments:
2729
- quantized-int4-weight-only
2830
2931
model_dir Directory containing model artifacts (optional, default: current directory)
30-
Expected files: model.pte, aoti_cuda_blob.ptd
32+
Expected files: model.pte, aoti_cuda_blob.ptd/aoti_metal_blob.ptd
3133
Tokenizers and test files will be downloaded to this directory
3234
3335
Examples:
34-
test_model_cuda_e2e.sh "openai/whisper-small" "non-quantized"
35-
test_model_cuda_e2e.sh "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
36+
test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
37+
test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
3638
EOF
3739
}
3840

@@ -55,20 +57,21 @@ fi
5557

5658
set -eux
5759

58-
HF_MODEL="$1"
59-
QUANT_NAME="$2"
60+
DEVICE="$1"
61+
HF_MODEL="$2"
62+
QUANT_NAME="$3"
6063
# Download tokenizers, audio, and image files to this directory
61-
MODEL_DIR="${3:-.}"
64+
MODEL_DIR="${4:-.}"
6265

6366
echo "Testing model: $HF_MODEL (quantization: $QUANT_NAME)"
6467

65-
# Make sure model.pte and aoti_cuda_blob.ptd exist
68+
# Make sure model.pte and aoti_${DEVICE}_blob.ptd exist
6669
if [ ! -f "$MODEL_DIR/model.pte" ]; then
6770
echo "Error: model.pte not found in $MODEL_DIR"
6871
exit 1
6972
fi
70-
if [ ! -f "$MODEL_DIR/aoti_cuda_blob.ptd" ]; then
71-
echo "Error: aoti_cuda_blob.ptd not found in $MODEL_DIR"
73+
if [ ! -f "$MODEL_DIR/aoti_${DEVICE}_blob.ptd" ]; then
74+
echo "Error: aoti_${DEVICE}_blob.ptd not found in $MODEL_DIR"
7275
exit 1
7376
fi
7477
# Locate EXECUTORCH_ROOT from the directory of this script
@@ -91,13 +94,13 @@ case "$HF_MODEL" in
9194
AUDIO_FILE="poem.wav"
9295
IMAGE_PATH=""
9396
;;
94-
openai/whisper-small)
95-
MODEL_NAME="whisper"
97+
openai/whisper-*)
98+
MODEL_NAME="${HF_MODEL#openai/}"
9699
RUNNER_TARGET="whisper_runner"
97100
RUNNER_PATH="whisper"
98101
EXPECTED_OUTPUT="Mr. Quilter is the apostle of the middle classes"
99102
PREPROCESSOR="whisper_preprocessor.pte"
100-
TOKENIZER_URL="https://huggingface.co/openai/whisper-small/resolve/main" # @lint-ignore
103+
TOKENIZER_URL="https://huggingface.co/${HF_MODEL}/resolve/main" # @lint-ignore
101104
TOKENIZER_FILE=""
102105
AUDIO_URL=""
103106
AUDIO_FILE="output.wav"
@@ -117,7 +120,7 @@ case "$HF_MODEL" in
117120
;;
118121
*)
119122
echo "Error: Unsupported model '$HF_MODEL'"
120-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper-small, google/gemma-3-4b-it"
123+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it"
121124
exit 1
122125
;;
123126
esac
@@ -142,7 +145,7 @@ fi
142145
# Download test files
143146
if [ "$AUDIO_URL" != "" ]; then
144147
curl -L $AUDIO_URL -o ${MODEL_DIR}/$AUDIO_FILE
145-
elif [ "$MODEL_NAME" = "whisper" ]; then
148+
elif [[ "$MODEL_NAME" == *whisper* ]]; then
146149
conda install -y -c conda-forge "ffmpeg<8"
147150
pip install datasets soundfile torchcodec
148151
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
@@ -152,35 +155,33 @@ ls -al
152155
echo "::endgroup::"
153156

154157
echo "::group::Build $MODEL_NAME Runner"
155-
cmake --preset llm \
156-
-DEXECUTORCH_BUILD_CUDA=ON \
157-
-DCMAKE_INSTALL_PREFIX=cmake-out \
158-
-DCMAKE_BUILD_TYPE=Release \
159-
-Bcmake-out -S.
160-
cmake --build cmake-out -j$(nproc) --target install --config Release
161-
162-
cmake -DEXECUTORCH_BUILD_CUDA=ON \
163-
-DCMAKE_BUILD_TYPE=Release \
164-
-Sexamples/models/$RUNNER_PATH \
165-
-Bcmake-out/examples/models/$RUNNER_PATH/
166-
cmake --build cmake-out/examples/models/$RUNNER_PATH --target $RUNNER_TARGET --config Release
158+
159+
if [ "$DEVICE" != "cuda" ] && [ "$DEVICE" != "metal" ]; then
160+
echo "Error: Unsupported device '$DEVICE'. Must be 'cuda' or 'metal'."
161+
exit 1
162+
fi
163+
164+
MAKE_TARGET="${RUNNER_PATH}-${DEVICE}"
165+
make "${MAKE_TARGET}"
167166
echo "::endgroup::"
168167

169168
echo "::group::Run $MODEL_NAME Runner"
170169
set +e
171-
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
170+
if [ "$DEVICE" = "cuda" ]; then
171+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
172+
fi
172173

173174
# Build runner command with common arguments
174175
RUNNER_BIN="cmake-out/examples/models/$RUNNER_PATH/$RUNNER_TARGET"
175-
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd --temperature 0"
176+
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --data_path ${MODEL_DIR}/aoti_${DEVICE}_blob.ptd --temperature 0"
176177

177178
# Add model-specific arguments
178179
case "$MODEL_NAME" in
179180
voxtral)
180181
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
181182
;;
182-
whisper)
183-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR"
183+
whisper-*)
184+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --audio_path ${MODEL_DIR}/$AUDIO_FILE --processor_path ${MODEL_DIR}/$PREPROCESSOR --model_name ${MODEL_NAME}"
184185
;;
185186
gemma3)
186187
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"

0 commit comments

Comments
 (0)