pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-qnn-deps.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 77 additions & 0 deletions b/‎.ci/scripts/setup-samsung-linux-deps.sh‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 1 addition & 3 deletions b/‎.ci/scripts/test_backend_linux.sh‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 2 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 139 additions & 0 deletions b/‎.ci/scripts/test_torchao_huggingface_checkpoints.sh‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 45 additions & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 34 additions & 6 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 34 additions & 6 deletions
@@ -1 +1 @@
-e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3
+4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e
@@ -11,4 +11,4 @@ source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_q
 
 setup_libcpp 12
 setup_android_ndk
-install_qnn
+install_qnn
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright (c) Samsung Electronics Co. LTD
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+
+download_ai_lite_core() {
+  API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
+  API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY"
+
+  VERSION="0.5"
+  OS_NAME="Ubuntu 22.04"
+  OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
+  TARGET_PATH="/tmp/exynos_ai_lite_core"
+
+  mkdir -p ${TARGET_PATH}
+  # Presigned issue URL
+  JSON_RESP=$(curl -sS -G \
+    --location --fail --retry 3 \
+    -H "apikey: ${API_KEY}" \
+    --data-urlencode "version=${VERSION}" \
+    --data-urlencode "os=${OS_NAME}" \
+    "${API_BASE}")
+
+  DOWNLOAD_URL=$(echo "$JSON_RESP" | sed -n 's/.*"data":[[:space:]]*"\([^"]*\)".*/\1/p')
+
+  if [[ -z "$DOWNLOAD_URL" ]]; then
+    echo "Failed to extract download URL"
+    echo "$JSON_RESP"
+    exit 1
+  fi
+
+  # Download LiteCore
+  curl -sS -L --fail --retry 3 \
+    --output "$OUT_FILE" \
+    "$DOWNLOAD_URL"
+
+  echo "Download done: $OUT_FILE"
+
+
+  tar -C "${TARGET_PATH}" --strip-components=1 -xzvf "${OUT_FILE}"
+
+  export EXYNOS_AI_LITECORE_ROOT=${TARGET_PATH}
+  export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux
+}
+
+install_enn_backend() {
+  NDK_INSTALLATION_DIR=/opt/ndk
+  rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}"
+  ANDROID_NDK_VERSION=r27b
+
+  pushd .
+  cd /tmp
+  curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+  unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip"
+
+  # Print the content for manual verification
+  ls -lah "android-ndk-${ANDROID_NDK_VERSION}"
+  sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}"
+  popd
+  # build Exynos backend
+  export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
+  bash backends/samsung/build.sh --build all
+  # set env variable
+  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
+  export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
+}
+
+AI_LITE_CORE_VERSION=0.5.0
+
+download_ai_lite_core ${AI_LITE_CORE_VERSION}
+install_enn_backend
@@ -30,10 +30,8 @@ if [[ "$FLOW" == *qnn* ]]; then
     # Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
     # cleaned up.
     PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-    PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-    PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+    PYTHON_EXECUTABLE=python source .ci/scripts/build-qnn-sdk.sh
     QNN_X86_LIB_DIR=`realpath build-x86/lib/`
-    QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
     export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
 
     # TODO Get SDK root from install scripts
 
@@ -9,8 +9,6 @@ set -euxo pipefail
 
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-# Source QNN configuration
-source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
 # Download QNN_SDK. If already downloaded, export environment path
 source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
 install_qnn
 
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# -------------------------
+# Args / flags
+# -------------------------
+TEST_WITH_RUNNER=0
+MODEL_NAME=""
+
+# Parse args
+if [[ $# -lt 1 ]]; then
+  echo "Usage: $0 <model_name> [--test_with_runner]"
+  echo "Supported model_name values: qwen3_4b, phi_4_mini"
+  exit 1
+fi
+
+MODEL_NAME="$1"
+shift
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --test_with_runner)
+      TEST_WITH_RUNNER=1
+      ;;
+    -h|--help)
+      echo "Usage: $0 <model_name> [--test_with_runner]"
+      echo "  model_name: qwen3_4b | phi_4_mini"
+      echo "  --test_with_runner: build ET + run llama_main to sanity-check the export"
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+MODEL_OUT=model.pte
+
+case "$MODEL_NAME" in
+  qwen3_4b)
+    echo "Running Qwen3-4B export..."
+    HF_MODEL_DIR=$(hf download pytorch/Qwen3-4B-INT8-INT4)
+    EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
+    $PYTHON_EXECUTABLE -m executorch.examples.models.qwen3.convert_weights \
+      $HF_MODEL_DIR \
+      pytorch_model_converted.bin
+
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
+      --model "qwen3_4b" \
+      --checkpoint pytorch_model_converted.bin \
+      --params examples/models/qwen3/config/4b_config.json \
+      --output_name $MODEL_OUT \
+      -kv \
+      --use_sdpa_with_kv_cache \
+      -X \
+      --xnnpack-extended-ops \
+      --max_context_length 1024 \
+      --max_seq_length 1024 \
+      --dtype fp32 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+    ;;
+
+  phi_4_mini)
+    echo "Running Phi-4-mini export..."
+    HF_MODEL_DIR=$(hf download pytorch/Phi-4-mini-instruct-INT8-INT4)
+    EXPECTED_MODEL_SIZE_UPPER_BOUND=$((3 * 1024 * 1024 * 1024)) # 3GB
+    $PYTHON_EXECUTABLE -m executorch.examples.models.phi_4_mini.convert_weights \
+      $HF_MODEL_DIR \
+      pytorch_model_converted.bin
+
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llama.export_llama \
+      --model "phi_4_mini" \
+      --checkpoint pytorch_model_converted.bin \
+      --params examples/models/phi_4_mini/config/config.json \
+      --output_name $MODEL_OUT \
+      -kv \
+      --use_sdpa_with_kv_cache \
+      -X \
+      --xnnpack-extended-ops \
+      --max_context_length 1024 \
+      --max_seq_length 1024 \
+      --dtype fp32 \
+      --metadata '{"get_bos_id":199999, "get_eos_ids":[200020,199999]}'
+    ;;
+
+  *)
+    echo "Error: unsupported model_name '$MODEL_NAME'"
+    echo "Supported values: qwen3_4b, phi_4_mini"
+    exit 1
+    ;;
+esac
+
+# Check file size
+MODEL_SIZE=$(stat --printf="%s" $MODEL_OUT 2>/dev/null || stat -f%z $MODEL_OUT)
+if [[ $MODEL_SIZE -gt $EXPECTED_MODEL_SIZE_UPPER_BOUND ]]; then
+  echo "Error: model size $MODEL_SIZE is greater than expected upper bound $EXPECTED_MODEL_SIZE_UPPER_BOUND"
+  exit 1
+fi
+
+# Install ET with CMake
+if [[ "$TEST_WITH_RUNNER" -eq 1 ]]; then
+  echo "[runner] Building and testing llama_main ..."
+    cmake -DPYTHON_EXECUTABLE=python \
+        -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DEXECUTORCH_ENABLE_LOGGING=1 \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
+        -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
+        -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
+        -Bcmake-out .
+    cmake --build cmake-out -j16 --config Release --target install
+
+
+    # Install llama runner
+    cmake -DPYTHON_EXECUTABLE=python \
+        -DCMAKE_BUILD_TYPE=Release \
+        -Bcmake-out/examples/models/llama \
+        examples/models/llama
+    cmake --build cmake-out/examples/models/llama -j16 --config Release
+
+    # Run the model
+    ./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path="${HF_MODEL_DIR}/tokenizer.json" --prompt="Once upon a time,"
+fi
+
+# Clean up
+rm -f pytorch_model_converted.bin "$MODEL_OUT"
@@ -292,7 +292,7 @@ jobs:
                       export.output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
-                    export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
+                    export QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724
                     export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
                     export PYTHONPATH=$(pwd)/..
 
@@ -432,7 +432,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         mkdir -p aar-out
-        PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
+        PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.250724 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
         mkdir -p extension/benchmark/android/benchmark/app/libs
         cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
         pushd extension/benchmark/android/benchmark
 
@@ -866,6 +866,41 @@ jobs:
         PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
 
 
+  test-samsung-models-linux:
+    name: test-samsung-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        set -ex
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # Setup python
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+        # Setup Samsung SDK (AI Lite Core) and install enn backend
+        source .ci/scripts/setup-samsung-linux-deps.sh
+
+        # Test models serially
+        models="mv2 ic3 resnet18 resnet50"
+        for model in $models; do
+          python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
+        done
+
+        # Test ops
+        python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
+
+
   test-vulkan-models-linux:
     name: test-vulkan-models-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -929,7 +964,17 @@ jobs:
         CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
         .ci/scripts/setup-linux.sh --build-tool "cmake"
 
+        # Custom operator tests
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
+        ./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
+        ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
+        ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
+        ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
+
+        # Run e2e testing for selected operators. More operators will be tested via this
+        # route in the future.
+        python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
+        python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
 
   nxp-build-test:
     name: nxp-build-test
 
@@ -8,9 +8,6 @@ on:
     tags:
       - ciflow/trunk/*
   pull_request:
-    paths:
-      - .ci/docker/ci_commit_pins/pytorch.txt
-      - .ci/scripts/**
   workflow_dispatch:
 
 concurrency:
@@ -585,6 +582,37 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
 
+  test-torchao-huggingface-checkpoints:
+    name: test-torchao-huggingface-checkpoints
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        model: [qwen3_4b, phi_4_mini]
+        include:
+          - model: qwen3_4b
+            test_with_runner: true
+          - model: phi_4_mini
+            test_with_runner: false
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+        pip install -U "huggingface_hub[cli]"
+
+        bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.test_with_runner && '--test_with_runner' || '' }}
+
   # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
   # test-llava-runner-macos:
   #   name: test-llava-runner-macos
@@ -993,13 +1021,13 @@ jobs:
       timeout: 60
       script: |
         conda init powershell
-        
+
         powershell -Command "& {
           Set-PSDebug -Trace 1
           \$ErrorActionPreference = 'Stop'
           \$PSNativeCommandUseErrorActionPreference = \$true
 
-          .ci/scripts/setup-windows.ps1       
+          .ci/scripts/setup-windows.ps1
 
           powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
-        }"
+        }"
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-e7152ff8a6a929a0db7f3f4a72a5b6d471769cd3`
	`1`	`+4d4abec80f03cd8fdefe1d9cb3a60d3690cd777e`