diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 07ec56ed99..954386777b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -246,7 +246,7 @@ jobs: export REPO_NAME=${{ matrix.repo_name }} bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }} echo "::endgroup::" - + echo "::group::Convert checkpoint" bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME} echo "::endgroup::" @@ -838,13 +838,11 @@ jobs: pip install -r requirements.txt export TORCHCHAT_ROOT=${PWD} - export ENABLE_ET_PYBIND=false - ./scripts/install_et.sh $ENABLE_ET_PYBIND + bash scripts/build_native.sh et python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")' python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")' - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja - cmake --build ./cmake-out --target et_run + - name: Download checkpoints run: | @@ -891,8 +889,8 @@ jobs: pip install -r requirements.txt pip list - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja - cmake --build ./cmake-out --target aoti_run + bash scripts/build_native.sh aoti + - name: Download checkpoint run: | mkdir -p checkpoints/stories15M diff --git a/README.md b/README.md index aa91710b84..8d2dd34f2e 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,10 @@ with `python3 torchchat.py remove llama3`. * [Run exported .so file via your own C++ application](#run-server) * in Chat mode * in Generate mode - * [Export for mobile via ExecuTorch](#export-executorch) + * [Export for mobile via ExecuTorch](#exporting-for-mobile-via-executorch) + * [Run exported ExecuTorch file on iOS or Android](#mobile-execution) * in Chat mode * in Generate mode - * [Run exported ExecuTorch file on iOS or Android](#run-mobile) - ## Running via PyTorch / Python @@ -235,6 +234,20 @@ python3 torchchat.py generate --dso-path stories15M.so --prompt "Hello my name i NOTE: The exported model will be large. We suggest you quantize the model, explained further down, before deploying the model on device. +**Build Native Runner Binary** + +We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.so` file exported after following the previous [examples](#aoti-aot-inductor) section. To build the runner binary on your Mac or Linux: + +```bash +scripts/build_native.sh aoti +``` + +Run: + +```bash +cmake-out/aoti_run model.so -z tokenizer.model -i "Once upon a time" +``` + ### ExecuTorch ExecuTorch enables you to optimize your model for execution on a mobile or embedded device, but can also be used on desktop for testing. @@ -250,7 +263,7 @@ python3 torchchat.py export stories15M --output-pte-path stories15M.pte python3 torchchat.py generate --device cpu --pte-path stories15M.pte --prompt "Hello my name is" ``` -See below under [Mobile Execution](#run-mobile) if you want to deploy and execute a model in your iOS or Android app. +See below under [Mobile Execution](#mobile-execution) if you want to deploy and execute a model in your iOS or Android app. @@ -265,6 +278,20 @@ Read the [iOS documentation](docs/iOS.md) for more details on iOS. Read the [Android documentation](docs/Android.md) for more details on Android. +**Build Native Runner Binary** + +We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.pte` file exported after following the previous [ExecuTorch](#executorch) section. Notice that this binary is for demo purpose, please follow the respective documentations, to see how to build a similar application on iOS and Android. To build the runner binary on your Mac or Linux: + +```bash +scripts/build_native.sh et +``` + +Run: + +```bash +cmake-out/et_run model.pte -z tokenizer.model -i "Once upon a time" +``` + ## Fine-tuned models from torchtune torchchat supports running inference with models fine-tuned using [torchtune](https://github.com/pytorch/torchtune). To do so, we first need to convert the checkpoints into a format supported by torchchat. diff --git a/runner/aoti.cmake b/runner/aoti.cmake index a54fae676f..3a612540f2 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17) IF(DEFINED ENV{TORCHCHAT_ROOT}) set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT}) ELSE() - set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() find_package(CUDA) diff --git a/runner/et.cmake b/runner/et.cmake index 8bd1cd9638..5c743dce28 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -15,24 +15,27 @@ ELSE() set(CMAKE_OUT_DIR "cmake-out") ENDIF() -MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--") - IF(DEFINED ENV{TORCHCHAT_ROOT}) set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT}) ELSE() - set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() project(Torchchat) +IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + SET(CMAKE_INSTALL_PREFIX ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install CACHE PATH "Setting it to a default value" FORCE) +ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + include(CMakePrintHelpers) include(runner/Utils.cmake) cmake_print_variables(TORCHCHAT_ROOT) -MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch") -set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch) -find_package(executorch CONFIG PATHS ${executorch_DIR}) +MESSAGE(STATUS "Looking for excutorch in ${CMAKE_INSTALL_PREFIX}") + +find_package(executorch CONFIG HINTS ${CMAKE_INSTALL_PREFIX}) + if(executorch_FOUND) set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src) @@ -46,21 +49,21 @@ if(executorch_FOUND) # Link ET runtime + extensions target_link_libraries( et_run PRIVATE - executorch - extension_module - ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch - optimized_kernels - quantized_kernels - portable_kernels - cpublas - eigen_blas - # The libraries below need to be whole-archived linked - optimized_native_cpu_ops_lib - quantized_ops_lib - xnnpack_backend - XNNPACK - pthreadpool - cpuinfo + executorch + extension_module + extension_data_loader + optimized_kernels + quantized_kernels + portable_kernels + cpublas + eigen_blas + # The libraries below need to be whole-archived linked + optimized_native_cpu_ops_lib + quantized_ops_lib + xnnpack_backend + XNNPACK + pthreadpool + cpuinfo ) target_link_options_shared_lib(optimized_native_cpu_ops_lib) target_link_options_shared_lib(quantized_ops_lib) @@ -73,8 +76,7 @@ if(executorch_FOUND) endif() target_link_libraries(et_run PRIVATE - "$") - + "$") # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log) diff --git a/scripts/build_native.sh b/scripts/build_native.sh new file mode 100755 index 0000000000..5b6bf41b43 --- /dev/null +++ b/scripts/build_native.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Simple script to build native aoti and et runner +# Function to display a help message + +set -ex + +show_help() { +cat << EOF +Usage: ${0##*/} [-h|--help] aoti|et +This script builds native aoti and et runner for LLM. + -h|--help Display this help and exit + aoti Build native runner for aoti + et Build native runner for et +EOF +} +# Check if no arguments were passed +if [ $# -eq 0 ]; then + echo "No arguments provided" + show_help + exit 1 +fi +while (( "$#" )); do + case "$1" in + -h|--help) + show_help + exit 0 + ;; + aoti) + echo "Building aoti native runner..." + TARGET="aoti" + shift + ;; + et) + echo "Building et native runner..." + TARGET="et" + shift + ;; + *) + echo "Invalid option: $1" + show_help + exit 1 + ;; + esac +done + +if [ -z "${TORCHCHAT_ROOT}" ]; then + # Get the absolute path of the current script + SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + # Get the absolute path of the parent directory + TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")" +fi + +if [ -z "${ET_BUILD_DIR}" ]; then + ET_BUILD_DIR="et-build" +fi + +source "$TORCHCHAT_ROOT/scripts/install_utils.sh" + +if [[ "$TARGET" == "et" ]]; then + pushd ${TORCHCHAT_ROOT} + git submodule update --init + find_cmake_prefix_path + install_pip_dependencies + clone_executorch + install_executorch_libs false + popd +fi + +# CMake commands +cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja +cmake --build ./cmake-out --target "${TARGET}"_run + +printf "Build finished. Please run: \n./cmake-out model. -z tokenizer.model -i " diff --git a/scripts/install_et.sh b/scripts/install_et.sh index afc0cf6365..22c3ac80ae 100755 --- a/scripts/install_et.sh +++ b/scripts/install_et.sh @@ -19,6 +19,5 @@ pushd ${TORCHCHAT_ROOT} find_cmake_prefix_path install_pip_dependencies clone_executorch -install_executorch_python_libs $ENABLE_ET_PYBIND -install_executorch +install_executorch_libs $ENABLE_ET_PYBIND popd diff --git a/scripts/install_utils.sh b/scripts/install_utils.sh index 89d4c844de..8383efa507 100644 --- a/scripts/install_utils.sh +++ b/scripts/install_utils.sh @@ -9,11 +9,7 @@ set -ex pipefail install_pip_dependencies() { echo "Intalling common pip packages" - - pip3 install wheel - pip3 install "cmake>=3.19" - pip3 install ninja - pip3 install zstd + pip3 install wheel "cmake>=3.19" ninja zstd pushd ${TORCHCHAT_ROOT} pip3 install -r ./requirements.txt popd @@ -60,6 +56,15 @@ install_executorch_python_libs() { popd } +COMMON_CMAKE_ARGS="\ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_LOG_LEVEL=Info \ + -DEXECUTORCH_BUILD_OPTIMIZED=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_QUANTIZED=ON" + install_executorch() { # AOT lib has to be build for model export # So by default it is built, and you can explicitly opt-out @@ -96,20 +101,25 @@ install_executorch() { echo "Inside: ${PWD}" rm -rf ${CMAKE_OUT_DIR} mkdir ${CMAKE_OUT_DIR} - cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_ENABLE_LOGGING=ON \ - -DEXECUTORCH_LOG_LEVEL=Info \ + cmake ${COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DEXECUTORCH_BUILD_CUSTOM_OPS_AOT=${EXECUTORCH_BUILD_CUSTOM_OPS_AOT_VAR} \ -DEXECUTORCH_BUILD_CUSTOM=${EXECUTORCH_BUILD_CUSTOM_VAR} \ - -DEXECUTORCH_BUILD_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_QUANTIZED=ON \ ${CROSS_COMPILE_ARGS} \ -S . -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} cmake --install ${CMAKE_OUT_DIR} --prefix ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install popd } + +install_executorch_libs() { + # Install executorch python and C++ libs + export CMAKE_ARGS="\ + ${COMMON_CMAKE_ARGS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" + export CMAKE_BUILD_ARGS="--target install" + + install_executorch_python_libs $1 +}