Skip to content

Commit d3a9389

Browse files
committed
Update base for Update on "Arm backend: Add 16A8W support for view and transpose operations"
Add 16A8W quantization support for view and transpose operations in ExecutorTorch ARM backend. This follows the pattern established for linear, mul, sigmoid, tanh, and slice operations, extending int16 support to view and transpose operations. Changes: - Add INT16 dtype validation support in op_transpose.py - Add test_view_tensor_16a8w_tosa_INT test function - Enable test_view.py in test targets configuration The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency. Differential Revision: [D80511313](https://our.internmc.facebook.com/intern/diff/D80511313/) cc digantdesai freddan80 per zingo oscarandersson8218 [ghstack-poisoned]
2 parents 01db40e + 0b78412 commit d3a9389

File tree

395 files changed

+24109
-3892
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

395 files changed

+24109
-3892
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3
1+
4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e

.ci/scripts/setup-qnn-deps.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_q
1111

1212
setup_libcpp 12
1313
setup_android_ndk
14-
install_qnn
14+
install_qnn
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# Copyright (c) Samsung Electronics Co. LTD
4+
# All rights reserved.
5+
#
6+
# This source code is licensed under the BSD-style license found in the
7+
# LICENSE file in the root directory of this source tree.
8+
9+
set -ex
10+
11+
12+
download_ai_lite_core() {
13+
API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
14+
API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
15+
16+
VERSION="0.5"
17+
OS_NAME="Ubuntu 22.04"
18+
OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
19+
TARGET_PATH="/tmp/exynos_ai_lite_core"
20+
21+
mkdir -p ${TARGET_PATH}
22+
# Presigned issue URL
23+
JSON_RESP=$(curl -sS -G \
24+
--location --fail --retry 3 \
25+
-H "apikey: ${API_KEY}" \
26+
--data-urlencode "version=${VERSION}" \
27+
--data-urlencode "os=${OS_NAME}" \
28+
"${API_BASE}")
29+
30+
DOWNLOAD_URL=$(echo "$JSON_RESP" | sed -n 's/.*"data":[[:space:]]*"\([^"]*\)".*/\1/p')
31+
32+
if [[ -z "$DOWNLOAD_URL" ]]; then
33+
echo "Failed to extract download URL"
34+
echo "$JSON_RESP"
35+
exit 1
36+
fi
37+
38+
# Download LiteCore
39+
curl -sS -L --fail --retry 3 \
40+
--output "$OUT_FILE" \
41+
"$DOWNLOAD_URL"
42+
43+
echo "Download done: $OUT_FILE"
44+
45+
46+
tar -C "${TARGET_PATH}" --strip-components=1 -xzvf "${OUT_FILE}"
47+
48+
export EXYNOS_AI_LITECORE_ROOT=${TARGET_PATH}
49+
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux
50+
}
51+
52+
install_enn_backend() {
53+
NDK_INSTALLATION_DIR=/opt/ndk
54+
rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
55+
ANDROID_NDK_VERSION=r27b
56+
57+
pushd .
58+
cd /tmp
59+
curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
60+
unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
61+
62+
# Print the content for manual verification
63+
ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
64+
sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
65+
popd
66+
# build Exynos backend
67+
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
68+
bash backends/samsung/build.sh --build all
69+
# set env variable
70+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
71+
export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
72+
}
73+
74+
AI_LITE_CORE_VERSION=0.5.0
75+
76+
download_ai_lite_core ${AI_LITE_CORE_VERSION}
77+
install_enn_backend

.ci/scripts/test_backend_linux.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,8 @@ if [[ "$FLOW" == *qnn* ]]; then
3030
# Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
3131
# cleaned up.
3232
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
33-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
34-
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
33+
PYTHON_EXECUTABLE=python source .ci/scripts/build-qnn-sdk.sh
3534
QNN_X86_LIB_DIR=`realpath build-x86/lib/`
36-
QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
3735
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
3836

3937
# TODO Get SDK root from install scripts

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ set -euxo pipefail
99

1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12-
# Source QNN configuration
13-
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
1412
# Download QNN_SDK. If already downloaded, export environment path
1513
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
1614
install_qnn
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# -------------------------
5+
# Args / flags
6+
# -------------------------
7+
TEST_WITH_RUNNER=0
8+
MODEL_NAME=""
9+
10+
# Parse args
11+
if [[ $# -lt 1 ]]; then
12+
echo "Usage: $0 <model_name> [--test_with_runner]"
13+
echo "Supported model_name values: qwen3_4b, phi_4_mini"
14+
exit 1
15+
fi
16+
17+
MODEL_NAME="$1"
18+
shift
19+
20+
while [[ $# -gt 0 ]]; do
21+
case "$1" in
22+
--test_with_runner)
23+
TEST_WITH_RUNNER=1
24+
;;
25+
-h|--help)
26+
echo "Usage: $0 <model_name> [--test_with_runner]"
27+
echo " model_name: qwen3_4b | phi_4_mini"
28+
echo " --test_with_runner: build ET + run llama_main to sanity-check the export"
29+
exit 0
30+
;;
31+
*)
32+
echo "Unknown option: $1"
33+
exit 1
34+
;;
35+
esac
36+
shift
37+
done
38+
39+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
40+
PYTHON_EXECUTABLE=python3
41+
fi
42+
43+
MODEL_OUT=model.pte
44+
45+
case "$MODEL_NAME" in
46+
qwen3_4b)
47+
echo "Running Qwen3-4B export..."
48+
HF_MODEL_DIR=$(hf download pytorch/Qwen3-4B-INT8-INT4)
49+
EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
50+
$PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \
51+
$HF_MODEL_DIR \
52+
pytorch_model_converted.bin
53+
54+
$PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
55+
--model "qwen3_4b" \
56+
--checkpoint pytorch_model_converted.bin \
57+
--params examples/models/qwen3/config/4b_config.json \
58+
--output_name $MODEL_OUT \
59+
-kv \
60+
--use_sdpa_with_kv_cache \
61+
-X \
62+
--xnnpack-extended-ops \
63+
--max_context_length 1024 \
64+
--max_seq_length 1024 \
65+
--dtype fp32 \
66+
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
67+
;;
68+
69+
phi_4_mini)
70+
echo "Running Phi-4-mini export..."
71+
HF_MODEL_DIR=$(hf download pytorch/Phi-4-mini-instruct-INT8-INT4)
72+
EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
73+
$PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \
74+
$HF_MODEL_DIR \
75+
pytorch_model_converted.bin
76+
77+
$PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
78+
--model "phi_4_mini" \
79+
--checkpoint pytorch_model_converted.bin \
80+
--params examples/models/phi_4_mini/config/config.json \
81+
--output_name $MODEL_OUT \
82+
-kv \
83+
--use_sdpa_with_kv_cache \
84+
-X \
85+
--xnnpack-extended-ops \
86+
--max_context_length 1024 \
87+
--max_seq_length 1024 \
88+
--dtype fp32 \
89+
--metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
90+
;;
91+
92+
*)
93+
echo "Error: unsupported model_name '$MODEL_NAME'"
94+
echo "Supported values: qwen3_4b, phi_4_mini"
95+
exit 1
96+
;;
97+
esac
98+
99+
# Check file size
100+
MODEL_SIZE=$(stat --printf="%s" $MODEL_OUT 2>/dev/null || stat -f%z $MODEL_OUT)
101+
if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
102+
echo "Error: model size $MODEL_SIZE is greater than expected upper bound $EXPECTED_MODEL_SIZE_UPPER_BOUND"
103+
exit 1
104+
fi
105+
106+
# Install ET with CMake
107+
if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
108+
echo "[runner] Building and testing llama_main ..."
109+
cmake -DPYTHON_EXECUTABLE=python \
110+
-DCMAKE_INSTALL_PREFIX=cmake-out \
111+
-DEXECUTORCH_ENABLE_LOGGING=1 \
112+
-DCMAKE_BUILD_TYPE=Release \
113+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
114+
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
115+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
116+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
117+
-DEXECUTORCH_BUILD_XNNPACK=ON \
118+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
119+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
120+
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
121+
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
122+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
123+
-Bcmake-out .
124+
cmake --build cmake-out -j16 --config Release --target install
125+
126+
127+
# Install llama runner
128+
cmake -DPYTHON_EXECUTABLE=python \
129+
-DCMAKE_BUILD_TYPE=Release \
130+
-Bcmake-out/examples/models/llama \
131+
examples/models/llama
132+
cmake --build cmake-out/examples/models/llama -j16 --config Release
133+
134+
# Run the model
135+
./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
136+
fi
137+
138+
# Clean up
139+
rm -f pytorch_model_converted.bin "$MODEL_OUT"

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ jobs:
292292
export.output_name="${OUT_ET_MODEL_NAME}.pte"
293293
ls -lh "${OUT_ET_MODEL_NAME}.pte"
294294
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
295-
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
295+
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724
296296
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
297297
export PYTHONPATH=$(pwd)/..
298298
@@ -432,7 +432,7 @@ jobs:
432432
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
433433
434434
mkdir -p aar-out
435-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
435+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
436436
mkdir -p extension/benchmark/android/benchmark/app/libs
437437
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
438438
pushd extension/benchmark/android/benchmark

.github/workflows/pull.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,41 @@ jobs:
866866
PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
867867
868868
869+
test-samsung-models-linux:
870+
name: test-samsung-models-linux
871+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
872+
permissions:
873+
id-token: write
874+
contents: read
875+
with:
876+
runner: linux.2xlarge
877+
docker-image: ci-image:executorch-ubuntu-22.04-gcc9
878+
submodules: 'recursive'
879+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
880+
timeout: 90
881+
script: |
882+
set -ex
883+
884+
# The generic Linux job chooses to use base env, not the one setup by the image
885+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
886+
conda activate "${CONDA_ENV}"
887+
888+
# Setup python
889+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
890+
891+
# Setup Samsung SDK (AI Lite Core) and install enn backend
892+
source .ci/scripts/setup-samsung-linux-deps.sh
893+
894+
# Test models serially
895+
models="mv2 ic3 resnet18 resnet50"
896+
for model in $models; do
897+
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
898+
done
899+
900+
# Test ops
901+
python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
902+
903+
869904
test-vulkan-models-linux:
870905
name: test-vulkan-models-linux
871906
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -929,7 +964,17 @@ jobs:
929964
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
930965
.ci/scripts/setup-linux.sh --build-tool "cmake"
931966
967+
# Custom operator tests
932968
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
969+
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
970+
./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
971+
./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
972+
./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
973+
974+
# Run e2e testing for selected operators. More operators will be tested via this
975+
# route in the future.
976+
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
977+
python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
933978
934979
nxp-build-test:
935980
name: nxp-build-test

.github/workflows/trunk.yml

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ on:
88
tags:
99
- ciflow/trunk/*
1010
pull_request:
11-
paths:
12-
- .ci/docker/ci_commit_pins/pytorch.txt
13-
- .ci/scripts/**
1411
workflow_dispatch:
1512

1613
concurrency:
@@ -585,6 +582,37 @@ jobs:
585582
# Test llama2
586583
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
587584
585+
test-torchao-huggingface-checkpoints:
586+
name: test-torchao-huggingface-checkpoints
587+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
588+
permissions:
589+
id-token: write
590+
contents: read
591+
strategy:
592+
matrix:
593+
model: [qwen3_4b, phi_4_mini]
594+
include:
595+
- model: qwen3_4b
596+
test_with_runner: true
597+
- model: phi_4_mini
598+
test_with_runner: false
599+
fail-fast: false
600+
with:
601+
runner: linux.2xlarge
602+
docker-image: ci-image:executorch-ubuntu-22.04-clang12
603+
submodules: 'recursive'
604+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
605+
timeout: 900
606+
script: |
607+
# The generic Linux job chooses to use base env, not the one setup by the image
608+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
609+
conda activate "${CONDA_ENV}"
610+
611+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
612+
pip install -U "huggingface_hub[cli]"
613+
614+
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.test_with_runner && '--test_with_runner' || '' }}
615+
588616
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
589617
# test-llava-runner-macos:
590618
# name: test-llava-runner-macos
@@ -993,13 +1021,13 @@ jobs:
9931021
timeout: 60
9941022
script: |
9951023
conda init powershell
996-
1024+
9971025
powershell -Command "& {
9981026
Set-PSDebug -Trace 1
9991027
\$ErrorActionPreference = 'Stop'
10001028
\$PSNativeCommandUseErrorActionPreference = \$true
10011029
1002-
.ci/scripts/setup-windows.ps1
1030+
.ci/scripts/setup-windows.ps1
10031031
10041032
powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
1005-
}"
1033+
}"

0 commit comments

Comments
 (0)