From 3a4fd046fcde308d72ee8484e84de48c4faea0d1 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 25 Apr 2024 00:43:43 -0700 Subject: [PATCH 1/4] Add build_native.sh and add README.md Summary: Added a script to build C++ runner for ET and AOTI. Updated README.md to ask users to run it. Made some improvement on building speed, by reducing duplicate build command. Now we can rely on `install_requirements.sh` to install all of the C++ libraries needed for runner. Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .github/workflows/pull.yml | 2 +- README.md | 21 ++++++++-- runner/aoti.cmake | 2 +- runner/et.cmake | 34 ++++++++-------- scripts/build_native.sh | 79 ++++++++++++++++++++++++++++++++++++++ scripts/install_et.sh | 3 +- scripts/install_utils.sh | 36 ++++++++++------- 7 files changed, 140 insertions(+), 37 deletions(-) create mode 100755 scripts/build_native.sh diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 3685f8951..cdff2c57d 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -246,7 +246,7 @@ jobs: export REPO_NAME=${{ matrix.repo_name }} bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }} echo "::endgroup::" - + echo "::group::Convert checkpoint" bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME} echo "::endgroup::" diff --git a/README.md b/README.md index 52a2a25ab..b3d62dcd3 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,10 @@ with `python3 torchchat.py remove llama3`. * [Run exported .so file via your own C++ application](#run-server) * in Chat mode * in Generate mode - * [Export for mobile via ExecuTorch](#export-executorch) + * [Export for mobile via ExecuTorch](#exporting-for-mobile-via-executorch) + * [Run exported ExecuTorch file on iOS or Android](#mobile-execution) * in Chat mode * in Generate mode - * [Run exported ExecuTorch file on iOS or Android](#run-mobile) - ## Running via PyTorch / Python @@ -251,7 +250,7 @@ python3 torchchat.py export stories15M --output-pte-path stories15M.pte python3 torchchat.py generate --device cpu --pte-path stories15M.pte --prompt "Hello my name is" ``` -See below under [Mobile Execution](#run-mobile) if you want to deploy and execute a model in your iOS or Android app. +See below under [Mobile Execution](#mobile-execution) if you want to deploy and execute a model in your iOS or Android app. ## Quantization @@ -276,6 +275,20 @@ Read the [iOS documentation](docs/iOS.md) for more details on iOS. Read the [Android documentation](docs/Android.md) for more details on Android. +**Build Native Runner Binary** + +We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.pte` file exported after following the previous [ExecuTorch](#executorch) section. Notice that this binary is for demo purpose, please follow the respective documentations, to see how to build a similar application on iOS and Android. To build the runner binary on your Mac or Linux: + +```bash +scripts/build_native.sh et +``` + +Run: + +```bash +cmake-out/et_run model.pte -z tokenizer.model -i "Once upon a time" +``` + ## Fine-tuned models from torchtune torchchat supports running inference with models fine-tuned using [torchtune](https://github.com/pytorch/torchtune). To do so, we first need to convert the checkpoints into a format supported by torchchat. diff --git a/runner/aoti.cmake b/runner/aoti.cmake index a54fae676..3a612540f 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17) IF(DEFINED ENV{TORCHCHAT_ROOT}) set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT}) ELSE() - set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() find_package(CUDA) diff --git a/runner/et.cmake b/runner/et.cmake index 8bd1cd963..26e3aa178 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -15,24 +15,27 @@ ELSE() set(CMAKE_OUT_DIR "cmake-out") ENDIF() -MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--") - IF(DEFINED ENV{TORCHCHAT_ROOT}) set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT}) ELSE() - set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() project(Torchchat) +IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + SET(CMAKE_INSTALL_PREFIX ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install CACHE PATH "Setting it to a default value" FORCE) +ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + include(CMakePrintHelpers) include(runner/Utils.cmake) cmake_print_variables(TORCHCHAT_ROOT) -MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch") -set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch) -find_package(executorch CONFIG PATHS ${executorch_DIR}) +MESSAGE(STATUS "Looking for excutorch in ${CMAKE_INSTALL_PREFIX}") + +find_package(executorch CONFIG HINTS ${CMAKE_INSTALL_PREFIX}) + if(executorch_FOUND) set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src) @@ -46,25 +49,27 @@ if(executorch_FOUND) # Link ET runtime + extensions target_link_libraries( et_run PRIVATE + cpublas + cpuinfo + eigen_blas executorch + extension_data_loader extension_module - ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch optimized_kernels - quantized_kernels portable_kernels - cpublas - eigen_blas + pthreadpool + quantized_kernels + XNNPACK # The libraries below need to be whole-archived linked + custom_ops optimized_native_cpu_ops_lib quantized_ops_lib xnnpack_backend - XNNPACK - pthreadpool - cpuinfo ) target_link_options_shared_lib(optimized_native_cpu_ops_lib) target_link_options_shared_lib(quantized_ops_lib) target_link_options_shared_lib(xnnpack_backend) + target_link_options_shared_lib(custom_ops) # Not clear why linking executorch as whole-archive outside android/apple is leading # to double registration. Most likely because of linkage issues. # Will figure this out later. Until then use this. @@ -72,9 +77,6 @@ if(executorch_FOUND) target_link_options_shared_lib(executorch) endif() - target_link_libraries(et_run PRIVATE - "$") - # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log) diff --git a/scripts/build_native.sh b/scripts/build_native.sh new file mode 100755 index 000000000..5b6bf41b4 --- /dev/null +++ b/scripts/build_native.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Simple script to build native aoti and et runner +# Function to display a help message + +set -ex + +show_help() { +cat << EOF +Usage: ${0##*/} [-h|--help] aoti|et +This script builds native aoti and et runner for LLM. + -h|--help Display this help and exit + aoti Build native runner for aoti + et Build native runner for et +EOF +} +# Check if no arguments were passed +if [ $# -eq 0 ]; then + echo "No arguments provided" + show_help + exit 1 +fi +while (( "$#" )); do + case "$1" in + -h|--help) + show_help + exit 0 + ;; + aoti) + echo "Building aoti native runner..." + TARGET="aoti" + shift + ;; + et) + echo "Building et native runner..." + TARGET="et" + shift + ;; + *) + echo "Invalid option: $1" + show_help + exit 1 + ;; + esac +done + +if [ -z "${TORCHCHAT_ROOT}" ]; then + # Get the absolute path of the current script + SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + # Get the absolute path of the parent directory + TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")" +fi + +if [ -z "${ET_BUILD_DIR}" ]; then + ET_BUILD_DIR="et-build" +fi + +source "$TORCHCHAT_ROOT/scripts/install_utils.sh" + +if [[ "$TARGET" == "et" ]]; then + pushd ${TORCHCHAT_ROOT} + git submodule update --init + find_cmake_prefix_path + install_pip_dependencies + clone_executorch + install_executorch_libs false + popd +fi + +# CMake commands +cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja +cmake --build ./cmake-out --target "${TARGET}"_run + +printf "Build finished. Please run: \n./cmake-out model. -z tokenizer.model -i " diff --git a/scripts/install_et.sh b/scripts/install_et.sh index afc0cf636..22c3ac80a 100755 --- a/scripts/install_et.sh +++ b/scripts/install_et.sh @@ -19,6 +19,5 @@ pushd ${TORCHCHAT_ROOT} find_cmake_prefix_path install_pip_dependencies clone_executorch -install_executorch_python_libs $ENABLE_ET_PYBIND -install_executorch +install_executorch_libs $ENABLE_ET_PYBIND popd diff --git a/scripts/install_utils.sh b/scripts/install_utils.sh index 89d4c844d..8383efa50 100644 --- a/scripts/install_utils.sh +++ b/scripts/install_utils.sh @@ -9,11 +9,7 @@ set -ex pipefail install_pip_dependencies() { echo "Intalling common pip packages" - - pip3 install wheel - pip3 install "cmake>=3.19" - pip3 install ninja - pip3 install zstd + pip3 install wheel "cmake>=3.19" ninja zstd pushd ${TORCHCHAT_ROOT} pip3 install -r ./requirements.txt popd @@ -60,6 +56,15 @@ install_executorch_python_libs() { popd } +COMMON_CMAKE_ARGS="\ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_LOG_LEVEL=Info \ + -DEXECUTORCH_BUILD_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_QUANTIZED=ON" + install_executorch() { # AOT lib has to be build for model export # So by default it is built, and you can explicitly opt-out @@ -96,20 +101,25 @@ install_executorch() { echo "Inside: ${PWD}" rm -rf ${CMAKE_OUT_DIR} mkdir ${CMAKE_OUT_DIR} - cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_ENABLE_LOGGING=ON \ - -DEXECUTORCH_LOG_LEVEL=Info \ + cmake ${COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DEXECUTORCH_BUILD_CUSTOM_OPS_AOT=${EXECUTORCH_BUILD_CUSTOM_OPS_AOT_VAR} \ -DEXECUTORCH_BUILD_CUSTOM=${EXECUTORCH_BUILD_CUSTOM_VAR} \ - -DEXECUTORCH_BUILD_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_QUANTIZED=ON \ ${CROSS_COMPILE_ARGS} \ -S . -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} cmake --install ${CMAKE_OUT_DIR} --prefix ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install popd } + +install_executorch_libs() { + # Install executorch python and C++ libs + export CMAKE_ARGS="\ + ${COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" + export CMAKE_BUILD_ARGS="--target install" + + install_executorch_python_libs $1 +} From 4db23996f2ccc241f5cd5517e3eaf5523aed00ac Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 25 Apr 2024 00:54:27 -0700 Subject: [PATCH 2/4] Revert custom ops change Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- runner/et.cmake | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/runner/et.cmake b/runner/et.cmake index 26e3aa178..5c743dce2 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -49,27 +49,25 @@ if(executorch_FOUND) # Link ET runtime + extensions target_link_libraries( et_run PRIVATE - cpublas - cpuinfo - eigen_blas - executorch - extension_data_loader - extension_module - optimized_kernels - portable_kernels - pthreadpool - quantized_kernels - XNNPACK - # The libraries below need to be whole-archived linked - custom_ops - optimized_native_cpu_ops_lib - quantized_ops_lib - xnnpack_backend + executorch + extension_module + extension_data_loader + optimized_kernels + quantized_kernels + portable_kernels + cpublas + eigen_blas + # The libraries below need to be whole-archived linked + optimized_native_cpu_ops_lib + quantized_ops_lib + xnnpack_backend + XNNPACK + pthreadpool + cpuinfo ) target_link_options_shared_lib(optimized_native_cpu_ops_lib) target_link_options_shared_lib(quantized_ops_lib) target_link_options_shared_lib(xnnpack_backend) - target_link_options_shared_lib(custom_ops) # Not clear why linking executorch as whole-archive outside android/apple is leading # to double registration. Most likely because of linkage issues. # Will figure this out later. Until then use this. @@ -77,6 +75,8 @@ if(executorch_FOUND) target_link_options_shared_lib(executorch) endif() + target_link_libraries(et_run PRIVATE + "$") # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log) From 04adc614e88e35799bc61a8ca836f899c28e045e Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 25 Apr 2024 01:02:22 -0700 Subject: [PATCH 3/4] Add build_native.sh to CI job Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .github/workflows/pull.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index cdff2c57d..455c85abf 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -838,13 +838,11 @@ jobs: pip install -r requirements.txt export TORCHCHAT_ROOT=${PWD} - export ENABLE_ET_PYBIND=false - ./scripts/install_et.sh $ENABLE_ET_PYBIND + bash scripts/build_native.sh et python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")' python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")' - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja - cmake --build ./cmake-out --target et_run + - name: Download checkpoints run: | @@ -891,8 +889,8 @@ jobs: pip install -r requirements.txt pip list - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja - cmake --build ./cmake-out --target aoti_run + bash scripts/build_native.sh aoti + - name: Download checkpoint run: | mkdir -p checkpoints/stories15M From e9396a15452147188ba43cef88e947e0639fa293 Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Thu, 25 Apr 2024 12:01:28 -0700 Subject: [PATCH 4/4] Add README for building native runner for aoti Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index b3d62dcd3..9d0d7cda8 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,20 @@ python3 torchchat.py generate --dso-path stories15M.so --prompt "Hello my name i NOTE: The exported model will be large. We suggest you quantize the model, explained further down, before deploying the model on device. +**Build Native Runner Binary** + +We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.so` file exported after following the previous [examples](#aoti-aot-inductor) section. To build the runner binary on your Mac or Linux: + +```bash +scripts/build_native.sh aoti +``` + +Run: + +```bash +cmake-out/aoti_run model.so -z tokenizer.model -i "Once upon a time" +``` + ### ExecuTorch ExecuTorch enables you to optimize your model for execution on a mobile or embedded device, but can also be used on desktop for testing.