Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 101 additions & 3 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,27 @@ jobs:
echo "::endgroup::"

export-gemma3-cuda-artifact:
name: export-gemma3-cuda-artifact
name: export-gemma3-cuda-${{ matrix.quant.name }}
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
secrets: inherit
strategy:
fail-fast: false
matrix:
quant:
- name: "non-quantized"
artifact: "voxtral-cuda-export"
extra_args: ""
# TODO: enable gemma3 quantization
# - name: "quantized-int4-tile-packed"
# artifact: "voxtral-cuda-quantized-int4-tile-packed"
# extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
# - name: "quantized-int4-weight-only"
# artifact: "voxtral-cuda-quantized-int4-weight-only"
# # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
# extra_args: "--qlinear_encoder 4w"
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
Expand All @@ -198,7 +211,8 @@ jobs:
pip list
echo "::endgroup::"

echo "::group::Export Gemma3"
echo "::group::Export Gemma3 (${{ matrix.quant.name }})"
EXTRA_ARGS="${{ matrix.quant.extra_args }}"
optimum-cli export executorch \
--model "google/gemma-3-4b-it" \
--task "multimodal-text-to-text" \
Expand All @@ -212,7 +226,7 @@ jobs:
test -f aoti_cuda_blob.ptd
echo "::endgroup::"

echo "::group::Store Gemma3 Artifacts"
echo "::group::Store Gemma3 Artifacts (${{ matrix.quant.name }})"
mkdir -p "${RUNNER_ARTIFACT_DIR}/"
cp model.pte "${RUNNER_ARTIFACT_DIR}/"
cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
Expand Down Expand Up @@ -407,3 +421,87 @@ jobs:
exit $EXIT_CODE
fi
echo "::endgroup::"

test-gemma3-cuda-e2e:
name: test-gemma3-cuda-e2e-${{ matrix.format.name }}
needs: export-gemma3-cuda-artifact
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
strategy:
fail-fast: false
matrix:
format:
- name: "non-quantized"
artifact: "gemma3-cuda-export"
# TODO: enable quantized gemma3.
# - name: "quantized-int4-tile-packed"
# artifact: "gemma3-cuda-quantized-int4-tile-packed"
# - name: "quantized-int4-weight-only"
# artifact: "gemma3-cuda-quantized-int4-weight-only"
with:
timeout: 90
runner: linux.g5.4xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: 12.6
use-custom-docker-registry: false
submodules: recursive
download-artifact: ${{ matrix.format.artifact }}
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
set -eux

echo "::group::Setup ExecuTorch Requirements"
./install_requirements.sh
pip list
echo "::endgroup::"

echo "::group::Prepare Gemma3 Artifacts (${{ matrix.format.name }})"
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
TOKENIZER_URL="https://huggingface.co/unsloth/gemma-3-1b-it/resolve/main/tokenizer.json"
curl -L $TOKENIZER_URL -o tokenizer.json
ls -al model.pte aoti_cuda_blob.ptd tokenizer.json
IMAGE_PATH="docs/source/_static/img/et-logo.png"
echo "::endgroup::"

echo "::group::Build Gemma3 Runner"
cmake --preset llm \
-DEXECUTORCH_BUILD_CUDA=ON \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE=Release \
-Bcmake-out -S.
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release

cmake -DEXECUTORCH_BUILD_CUDA=ON \
-DCMAKE_BUILD_TYPE=Release \
-Sexamples/models/gemma3 \
-Bcmake-out/examples/models/gemma3/
cmake --build cmake-out/examples/models/gemma3 --target gemma3_e2e_runner --config Release
echo "::endgroup::"

echo "::group::Run Gemma3 Runner (${{ matrix.format.name }})"
set +e
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
OUTPUT=$(cmake-out/examples/models/gemma3/gemma3_e2e_runner \
--model_path model.pte \
--data_path aoti_cuda_blob.ptd \
--tokenizer_path tokenizer.json \
--image_path $IMAGE_PATH \
--temperature 0 2>&1)
EXIT_CODE=$?
set -e

echo "$OUTPUT"

if ! echo "$OUTPUT" | grep -iq "chip"; then
echo "Expected output 'chip' not found in output"
exit 1
fi

if [ $EXIT_CODE -ne 0 ]; then
echo "Unexpected exit code: $EXIT_CODE"
exit $EXIT_CODE
fi
echo "::endgroup::"
124 changes: 124 additions & 0 deletions examples/models/gemma3/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#
# Simple CMake build system for gemma3 e2e runner.
#
cmake_minimum_required(VERSION 3.24)
project(gemma3)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)

include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
set(CMAKE_TOOLCHAIN_IOS ON)
else()
set(CMAKE_TOOLCHAIN_IOS OFF)
endif()

if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()

# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Need this for gflags for some reason
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

# Find `executorch` libraries, same as for gflags
list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
executorch_target_link_options_shared_lib(executorch)

set(link_libraries executorch gflags)
set(_srcs e2e_runner.cpp)

list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
quantized_ops_lib
custom_ops
cpublas
eigen_blas
)
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
executorch_target_link_options_shared_lib(quantized_ops_lib)
executorch_target_link_options_shared_lib(custom_ops)

# XNNPACK
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
if(TARGET kleidiai)
list(APPEND xnnpack_backend_libs kleidiai)
endif()
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

# Add LLM runner and extension module
if(NOT TARGET extension_llm_runner)
message(
FATAL_ERROR
"ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
)
endif()

# Needed for cpuinfo where it uses android specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

# stb_image: a lightweight library to load images
include(FetchContent)
FetchContent_Declare(
stb
GIT_REPOSITORY https://github.com/nothings/stb.git
GIT_TAG f0569113c93ad095470c54bf34a17b36646bbbb5
)
FetchContent_MakeAvailable(stb)
# Add deprecated/ to use stb_image_resize.h for internal compatibility
list(APPEND _common_include_directories ${stb_SOURCE_DIR}
${stb_SOURCE_DIR}/deprecated
)

# Add the required ExecuTorch extensions for multimodal LLM runner
list(
APPEND
link_libraries
extension_llm_runner
extension_module
extension_data_loader
extension_tensor
extension_flat_tensor
)

# Link CUDA backend
if(EXECUTORCH_BUILD_CUDA)
find_package(CUDAToolkit REQUIRED)
list(APPEND link_libraries aoti_cuda)
executorch_target_link_options_shared_lib(aoti_cuda)
endif()

# Add tokenizers
list(APPEND link_libraries tokenizers::tokenizers)

add_executable(gemma3_e2e_runner ${_srcs})
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options_gc_sections(gemma3_e2e_runner)
if(NOT APPLE)
target_link_options(gemma3_e2e_runner PRIVATE "LINKER:-s")
endif()
endif()

target_include_directories(
gemma3_e2e_runner PUBLIC ${_common_include_directories}
)
target_link_libraries(gemma3_e2e_runner PUBLIC ${link_libraries})
target_compile_options(gemma3_e2e_runner PUBLIC ${_common_compile_options})
Loading
Loading