Skip to content

Commit 64b0337

Browse files
authored
Merge branch 'main' into change-1119290
2 parents 829d167 + d00279d commit 64b0337

File tree

273 files changed

+11673
-1647
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

273 files changed

+11673
-1647
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
53a2908a10f414a2f85caa06703a26a40e873869
1+
cf9d09490c7f6685ec68d5db3acf2e0d73c54d00

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ download_ai_lite_core() {
1313
API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
1414
API_KEY=$SAMSUNG_AI_LITECORE_KEY
1515

16-
VERSION="0.5"
16+
VERSION="0.7"
1717
OS_NAME="Ubuntu 22.04"
1818
OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
1919
TARGET_PATH="/tmp/exynos_ai_lite_core"
@@ -62,7 +62,7 @@ install_enn_backend() {
6262
export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
6363
}
6464

65-
AI_LITE_CORE_VERSION=0.5.0
65+
AI_LITE_CORE_VERSION=0.7.0
6666

6767
download_ai_lite_core ${AI_LITE_CORE_VERSION}
6868
install_enn_backend

.ci/scripts/test_ios_ci.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ say() {
3636

3737
say "Cloning the Demo App"
3838

39+
git config --global http.postBuffer 524288000
3940
git clone --depth 1 https://github.com/meta-pytorch/executorch-examples.git
4041

4142
say "Installing CoreML Backend Requirements"

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \
3838
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
41+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
4142
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
4243
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
4344
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \

.github/workflows/cuda.yml

Lines changed: 131 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90-
test-voxtral-cuda-e2e:
91-
name: test-voxtral-cuda-e2e
90+
export-voxtral-cuda-artifact:
91+
name: export-voxtral-cuda-artifact
9292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393
permissions:
9494
id-token: write
@@ -104,6 +104,7 @@ jobs:
104104
gpu-arch-version: 12.6
105105
use-custom-docker-registry: false
106106
submodules: recursive
107+
upload-artifact: voxtral-cuda-export
107108
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
108109
script: |
109110
set -eux
@@ -118,6 +119,7 @@ jobs:
118119
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
119120
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
120121
pip install mistral-common librosa
122+
pip list
121123
echo "::endgroup::"
122124
123125
echo "::group::Export Voxtral"
@@ -129,43 +131,152 @@ jobs:
129131
--device cuda \
130132
--max_seq_len 1024 \
131133
--output_dir ./
134+
python -m executorch.extension.audio.mel_spectrogram \
135+
--feature_size 128 \
136+
--stack_output \
137+
--max_audio_len 300 \
138+
--output_file voxtral_preprocessor.pte
139+
140+
test -f model.pte
141+
test -f aoti_cuda_blob.ptd
142+
test -f voxtral_preprocessor.pte
132143
echo "::endgroup::"
133144
134-
echo "::group::Build Voxtral Runner"
145+
echo "::group::Store Voxtral Artifacts"
146+
mkdir -p "${RUNNER_ARTIFACT_DIR}"
147+
cp model.pte "${RUNNER_ARTIFACT_DIR}/"
148+
cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
149+
cp voxtral_preprocessor.pte "${RUNNER_ARTIFACT_DIR}/"
150+
ls -al "${RUNNER_ARTIFACT_DIR}"
151+
echo "::endgroup::"
152+
153+
benchmark-voxtral-cuda:
154+
name: benchmark-voxtral-cuda
155+
needs: export-voxtral-cuda-artifact
156+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
157+
permissions:
158+
id-token: write
159+
contents: read
160+
strategy:
161+
fail-fast: false
162+
with:
163+
timeout: 90
164+
runner: linux.g5.4xlarge.nvidia.gpu
165+
gpu-arch-type: cuda
166+
gpu-arch-version: 12.6
167+
use-custom-docker-registry: false
168+
submodules: recursive
169+
download-artifact: voxtral-cuda-export
170+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
171+
script: |
172+
set -eux
173+
174+
echo "::group::Setup ExecuTorch Requirements"
175+
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
176+
pip list
177+
echo "::endgroup::"
178+
179+
echo "::group::Prepare Voxtral Artifacts"
180+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
181+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
182+
ls -al model.pte aoti_cuda_blob.ptd
183+
echo "::endgroup::"
184+
185+
echo "::group::Build Voxtral Benchmark"
135186
cmake -DCMAKE_BUILD_TYPE=Release \
136187
-DEXECUTORCH_BUILD_CUDA=ON \
137188
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
138189
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
190+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
139191
-DEXECUTORCH_BUILD_TESTS=ON \
140192
-Bcmake-out .
141193
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
142194
echo "::endgroup::"
143195
196+
echo "::group::Run Voxtral Benchmark"
197+
198+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
199+
cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd
200+
201+
echo "::endgroup::"
202+
203+
test-voxtral-cuda-e2e:
204+
name: test-voxtral-cuda-e2e
205+
needs: export-voxtral-cuda-artifact
206+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
207+
permissions:
208+
id-token: write
209+
contents: read
210+
strategy:
211+
fail-fast: false
212+
with:
213+
timeout: 90
214+
runner: linux.g5.4xlarge.nvidia.gpu
215+
gpu-arch-type: cuda
216+
gpu-arch-version: 12.6
217+
use-custom-docker-registry: false
218+
submodules: recursive
219+
download-artifact: voxtral-cuda-export
220+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
221+
script: |
222+
set -eux
223+
224+
echo "::group::Setup ExecuTorch Requirements"
225+
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
226+
pip list
227+
echo "::endgroup::"
228+
229+
echo "::group::Prepare Voxtral Artifacts"
230+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
231+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
232+
cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" .
233+
TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json"
234+
curl -L $TOKENIZER_URL -o tekken.json
235+
ls -al model.pte aoti_cuda_blob.ptd voxtral_preprocessor.pte tekken.json
236+
echo "::endgroup::"
237+
238+
echo "::group::Download Test Audio File"
239+
AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
240+
curl -L $AUDIO_URL -o poem.wav
241+
echo "::endgroup::"
242+
243+
echo "::group::Build Voxtral Runner"
244+
cmake --preset llm \
245+
-DEXECUTORCH_BUILD_CUDA=ON \
246+
-DCMAKE_INSTALL_PREFIX=cmake-out \
247+
-DCMAKE_BUILD_TYPE=Release \
248+
-Bcmake-out -S.
249+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release
250+
251+
cmake -DEXECUTORCH_BUILD_CUDA=ON \
252+
-DCMAKE_BUILD_TYPE=Release \
253+
-Sexamples/models/voxtral \
254+
-Bcmake-out/examples/models/voxtral/
255+
cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release
256+
echo "::endgroup::"
257+
144258
echo "::group::Run Voxtral Runner"
145-
# Capture output and allow exit code 139 if we have the expected printout
146259
set +e
147260
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
148-
OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1)
261+
OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \
262+
--model_path model.pte \
263+
--data_path aoti_cuda_blob.ptd \
264+
--tokenizer_path tekken.json \
265+
--audio_path poem.wav \
266+
--processor_path voxtral_preprocessor.pte \
267+
--temperature 0 2>&1)
149268
EXIT_CODE=$?
150269
set -e
151270
152271
echo "$OUTPUT"
153272
154-
# Check if the output contains "Run latency (ms):"
155-
if echo "$OUTPUT" | grep -q "Run latency (ms):"; then
156-
echo "Found expected output: 'Run latency (ms):'"
157-
if [ $EXIT_CODE -eq 139 ]; then
158-
echo "Exit code 139 (segfault) detected, but passing since we have the expected output"
159-
exit 0
160-
elif [ $EXIT_CODE -ne 0 ]; then
161-
echo "Unexpected exit code: $EXIT_CODE"
162-
exit $EXIT_CODE
163-
else
164-
echo "Command succeeded with exit code 0"
165-
exit 0
166-
fi
167-
else
168-
echo "Expected output 'Run latency (ms):' not found in output"
273+
if ! echo "$OUTPUT" | grep -iq "poem"; then
274+
echo "Expected output 'poem' not found in output"
169275
exit 1
170276
fi
277+
278+
if [ $EXIT_CODE -ne 0 ]; then
279+
echo "Unexpected exit code: $EXIT_CODE"
280+
exit $EXIT_CODE
281+
fi
171282
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,12 @@ jobs:
935935
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
936936
done
937937
938+
# Test quant models
939+
model_scripts="deeplab_v3 edsr inception_v3 inception_v4 mobilenet_v2 mobilenet_v3 resnet18 resnet50 vit wav2letter"
940+
for m_script in $model_scripts; do
941+
python -m executorch.examples.samsung.scripts.${m_script} -c e9955 -p A8W8
942+
done
943+
938944
# Test ops
939945
python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
940946

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ jobs:
346346
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
347347
setup_script_args="--target-toolchain zephyr"
348348
toolchain_prefix=arm-zephyr-eabi-
349-
threshold="135168" # 132 KiB
349+
threshold="135240" # 132 KiB
350350
toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
351351
else
352352
echo "Fail unsupport OS selection ${{ matrix.os }}"

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
266266
executorch_move_interface_include_directories_to_build_time_only(
267267
pthreadpool_interface
268268
)
269+
270+
if(APPLE)
271+
# Use hidden visibility for pthreadpool on Apple platforms to avoid issues
272+
# with pthreadpool symbols from libtorch_cpu taking precedence over the ones
273+
# from the pthreadpool library statically linked in _portable_lib. The
274+
# pthreadpool public APIs are marked as weak by default on some Apple
275+
# platforms, so setting to hidden visibility works around this by not
276+
# putting the symbol in the indirection table. See
277+
# https://github.com/pytorch/executorch/issues/14321 for more details.
278+
target_compile_options(pthreadpool PRIVATE -fvisibility=hidden)
279+
endif()
280+
269281
install(
270282
TARGETS pthreadpool pthreadpool_interface fxdiv
271283
EXPORT ExecuTorchTargets

backends/aoti/CMakeLists.txt

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,8 @@ target_compile_options(aoti_common PUBLIC -fexceptions -frtti -fPIC)
4040
# Ensure symbols are exported properly
4141
target_link_options(aoti_common PUBLIC -Wl,--export-dynamic)
4242

43-
# Link against PyTorch libraries and standard libraries
44-
target_link_libraries(
45-
aoti_common
46-
PUBLIC extension_tensor ${CMAKE_DL_LIBS}
47-
# Link PyTorch libraries for AOTI functions
48-
${TORCH_LIBRARIES}
49-
)
43+
# Link against ExecuTorch libraries and standard libraries
44+
target_link_libraries(aoti_common PUBLIC extension_tensor ${CMAKE_DL_LIBS})
5045
executorch_target_link_options_shared_lib(aoti_common)
5146

5247
install(

backends/aoti/aoti_model_container.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ struct AOTIDelegateHandle {
7777
void* so_handle;
7878
std::string so_path;
7979
AOTInductorModelContainerHandle container_handle;
80+
void* cuda_stream; // cudaStream_t stored as void* to avoid CUDA header
81+
// dependency
8082
};
8183

8284
} // namespace aoti

0 commit comments

Comments
 (0)