From 3a4fd046fcde308d72ee8484e84de48c4faea0d1 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 25 Apr 2024 00:43:43 -0700
Subject: [PATCH 1/4] Add build_native.sh and add README.md

Summary:

Added a script to build C++ runner for ET and AOTI. Updated README.md to
ask users to run it.

Made some improvement on building speed, by reducing duplicate build
command. Now we can rely on `install_requirements.sh` to install all of
the C++ libraries needed for runner.

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml |  2 +-
 README.md                  | 21 ++++++++--
 runner/aoti.cmake          |  2 +-
 runner/et.cmake            | 34 ++++++++--------
 scripts/build_native.sh    | 79 ++++++++++++++++++++++++++++++++++++++
 scripts/install_et.sh      |  3 +-
 scripts/install_utils.sh   | 36 ++++++++++-------
 7 files changed, 140 insertions(+), 37 deletions(-)
 create mode 100755 scripts/build_native.sh

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 3685f8951..cdff2c57d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -246,7 +246,7 @@ jobs:
         export REPO_NAME=${{ matrix.repo_name }}
         bash .ci/scripts/wget_checkpoint.sh ${REPO_NAME} ${{ matrix.resources }}
         echo "::endgroup::"
-      
+
         echo "::group::Convert checkpoint"
         bash .ci/scripts/convert_checkpoint.sh ${REPO_NAME}
         echo "::endgroup::"
diff --git a/README.md b/README.md
index 52a2a25ab..b3d62dcd3 100644
--- a/README.md
+++ b/README.md
@@ -73,11 +73,10 @@ with `python3 torchchat.py remove llama3`.
   * [Run exported .so file via your own C++ application](#run-server)
      * in Chat mode
      * in Generate mode
-  * [Export for mobile via ExecuTorch](#export-executorch)
+  * [Export for mobile via ExecuTorch](#exporting-for-mobile-via-executorch)
+  * [Run exported ExecuTorch file on iOS or Android](#mobile-execution)
      * in Chat mode
      * in Generate mode
-  * [Run exported ExecuTorch file on iOS or Android](#run-mobile)
-
 
 ## Running via PyTorch / Python
 
@@ -251,7 +250,7 @@ python3 torchchat.py export stories15M --output-pte-path stories15M.pte
 python3 torchchat.py generate --device cpu --pte-path stories15M.pte --prompt "Hello my name is"
 ```
 
-See below under [Mobile Execution](#run-mobile) if you want to deploy and execute a model in your iOS or Android app.
+See below under [Mobile Execution](#mobile-execution) if you want to deploy and execute a model in your iOS or Android app.
 
 
 ## Quantization
@@ -276,6 +275,20 @@ Read the [iOS documentation](docs/iOS.md) for more details on iOS.
 
 Read the [Android documentation](docs/Android.md) for more details on Android.
 
+**Build Native Runner Binary**
+
+We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.pte` file exported after following the previous [ExecuTorch](#executorch) section. Notice that this binary is for demo purpose, please follow the respective documentations, to see how to build a similar application on iOS and Android. To build the runner binary on your Mac or Linux:
+
+```bash
+scripts/build_native.sh et
+```
+
+Run:
+
+```bash
+cmake-out/et_run model.pte -z tokenizer.model -i "Once upon a time"
+```
+
 ## Fine-tuned models from torchtune
 
 torchchat supports running inference with models fine-tuned using [torchtune](https://github.com/pytorch/torchtune). To do so, we first need to convert the checkpoints into a format supported by torchchat.
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index a54fae676..3a612540f 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -3,7 +3,7 @@ set(CMAKE_CXX_STANDARD 17)
 IF(DEFINED ENV{TORCHCHAT_ROOT})
     set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
 ELSE()
-    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 ENDIF()
 
 find_package(CUDA)
diff --git a/runner/et.cmake b/runner/et.cmake
index 8bd1cd963..26e3aa178 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -15,24 +15,27 @@ ELSE()
   set(CMAKE_OUT_DIR "cmake-out")
 ENDIF()
 
-MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")
-
 IF(DEFINED ENV{TORCHCHAT_ROOT})
     set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
 ELSE()
-    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 ENDIF()
 
 project(Torchchat)
 
+IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  SET(CMAKE_INSTALL_PREFIX ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install CACHE PATH "Setting it to a default value" FORCE)
+ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+
 include(CMakePrintHelpers)
 include(runner/Utils.cmake)
 
 cmake_print_variables(TORCHCHAT_ROOT)
 
-MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch")
-set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch)
-find_package(executorch CONFIG PATHS ${executorch_DIR})
+MESSAGE(STATUS "Looking for excutorch in ${CMAKE_INSTALL_PREFIX}")
+
+find_package(executorch CONFIG HINTS ${CMAKE_INSTALL_PREFIX})
+
 if(executorch_FOUND)
   set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)
 
@@ -46,25 +49,27 @@ if(executorch_FOUND)
   # Link ET runtime + extensions
   target_link_libraries(
     et_run PRIVATE
+          cpublas
+          cpuinfo
+          eigen_blas
           executorch
+          extension_data_loader
           extension_module
-          ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
           optimized_kernels
-          quantized_kernels
           portable_kernels
-          cpublas
-          eigen_blas
+          pthreadpool
+          quantized_kernels
+          XNNPACK
           # The libraries below need to be whole-archived linked
+          custom_ops
           optimized_native_cpu_ops_lib
           quantized_ops_lib
           xnnpack_backend
-          XNNPACK
-          pthreadpool
-          cpuinfo
   )
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
   target_link_options_shared_lib(quantized_ops_lib)
   target_link_options_shared_lib(xnnpack_backend)
+  target_link_options_shared_lib(custom_ops)
   # Not clear why linking executorch as whole-archive outside android/apple is leading
   # to double registration. Most likely because of linkage issues.
   # Will figure this out later. Until then use this.
@@ -72,9 +77,6 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
-  target_link_libraries(et_run PRIVATE
-  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")
-
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)
diff --git a/scripts/build_native.sh b/scripts/build_native.sh
new file mode 100755
index 000000000..5b6bf41b4
--- /dev/null
+++ b/scripts/build_native.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Simple script to build native aoti and et runner
+# Function to display a help message
+
+set -ex
+
+show_help() {
+cat << EOF
+Usage: ${0##*/} [-h|--help] aoti|et
+This script builds native aoti and et runner for LLM.
+    -h|--help  Display this help and exit
+    aoti       Build native runner for aoti
+    et         Build native runner for et
+EOF
+}
+# Check if no arguments were passed
+if [ $# -eq 0 ]; then
+    echo "No arguments provided"
+    show_help
+    exit 1
+fi
+while (( "$#" )); do
+  case "$1" in
+    -h|--help)
+      show_help
+      exit 0
+      ;;
+    aoti)
+      echo "Building aoti native runner..."
+      TARGET="aoti"
+      shift
+      ;;
+    et)
+      echo "Building et native runner..."
+      TARGET="et"
+      shift
+      ;;
+    *)
+      echo "Invalid option: $1"
+      show_help
+      exit 1
+      ;;
+  esac
+done
+
+if [ -z "${TORCHCHAT_ROOT}" ]; then
+    # Get the absolute path of the current script
+    SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+    # Get the absolute path of the parent directory
+    TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")"
+fi
+
+if [ -z "${ET_BUILD_DIR}" ]; then
+    ET_BUILD_DIR="et-build"
+fi
+
+source "$TORCHCHAT_ROOT/scripts/install_utils.sh"
+
+if [[ "$TARGET" == "et" ]]; then
+    pushd ${TORCHCHAT_ROOT}
+    git submodule update --init
+    find_cmake_prefix_path
+    install_pip_dependencies
+    clone_executorch
+    install_executorch_libs false
+    popd
+fi
+
+# CMake commands
+cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
+cmake --build ./cmake-out --target "${TARGET}"_run
+
+printf "Build finished. Please run: \n./cmake-out model.<pte|so> -z tokenizer.model -i <prompt>"
diff --git a/scripts/install_et.sh b/scripts/install_et.sh
index afc0cf636..22c3ac80a 100755
--- a/scripts/install_et.sh
+++ b/scripts/install_et.sh
@@ -19,6 +19,5 @@ pushd ${TORCHCHAT_ROOT}
 find_cmake_prefix_path
 install_pip_dependencies
 clone_executorch
-install_executorch_python_libs $ENABLE_ET_PYBIND
-install_executorch
+install_executorch_libs $ENABLE_ET_PYBIND
 popd
diff --git a/scripts/install_utils.sh b/scripts/install_utils.sh
index 89d4c844d..8383efa50 100644
--- a/scripts/install_utils.sh
+++ b/scripts/install_utils.sh
@@ -9,11 +9,7 @@ set -ex pipefail
 
 install_pip_dependencies() {
   echo "Intalling common pip packages"
-
-  pip3 install wheel
-  pip3 install "cmake>=3.19"
-  pip3 install ninja
-  pip3 install zstd
+  pip3 install wheel "cmake>=3.19" ninja zstd
   pushd ${TORCHCHAT_ROOT}
   pip3 install -r ./requirements.txt
   popd
@@ -60,6 +56,15 @@ install_executorch_python_libs() {
   popd
 }
 
+COMMON_CMAKE_ARGS="\
+    -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_ENABLE_LOGGING=ON \
+    -DEXECUTORCH_LOG_LEVEL=Info \
+    -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_QUANTIZED=ON"
+
 install_executorch() {
   # AOT lib has to be build for model export
   # So by default it is built, and you can explicitly opt-out
@@ -96,20 +101,25 @@ install_executorch() {
   echo "Inside: ${PWD}"
   rm -rf ${CMAKE_OUT_DIR}
   mkdir ${CMAKE_OUT_DIR}
-  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_ENABLE_LOGGING=ON \
-        -DEXECUTORCH_LOG_LEVEL=Info \
+  cmake ${COMMON_CMAKE_ARGS} \
+        -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
         -DEXECUTORCH_BUILD_CUSTOM_OPS_AOT=${EXECUTORCH_BUILD_CUSTOM_OPS_AOT_VAR} \
         -DEXECUTORCH_BUILD_CUSTOM=${EXECUTORCH_BUILD_CUSTOM_VAR} \
-        -DEXECUTORCH_BUILD_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_QUANTIZED=ON \
         ${CROSS_COMPILE_ARGS} \
         -S . -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build ${CMAKE_OUT_DIR}
   cmake --install ${CMAKE_OUT_DIR} --prefix ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install
   popd
 }
+
+install_executorch_libs() {
+  # Install executorch python and C++ libs
+  export CMAKE_ARGS="\
+    ${COMMON_CMAKE_ARGS} \
+    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install"
+  export CMAKE_BUILD_ARGS="--target install"
+
+  install_executorch_python_libs $1
+}

From 4db23996f2ccc241f5cd5517e3eaf5523aed00ac Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 25 Apr 2024 00:54:27 -0700
Subject: [PATCH 2/4] Revert custom ops change

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 runner/et.cmake | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/runner/et.cmake b/runner/et.cmake
index 26e3aa178..5c743dce2 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -49,27 +49,25 @@ if(executorch_FOUND)
   # Link ET runtime + extensions
   target_link_libraries(
     et_run PRIVATE
-          cpublas
-          cpuinfo
-          eigen_blas
-          executorch
-          extension_data_loader
-          extension_module
-          optimized_kernels
-          portable_kernels
-          pthreadpool
-          quantized_kernels
-          XNNPACK
-          # The libraries below need to be whole-archived linked
-          custom_ops
-          optimized_native_cpu_ops_lib
-          quantized_ops_lib
-          xnnpack_backend
+    executorch
+    extension_module
+    extension_data_loader
+    optimized_kernels
+    quantized_kernels
+    portable_kernels
+    cpublas
+    eigen_blas
+    # The libraries below need to be whole-archived linked
+    optimized_native_cpu_ops_lib
+    quantized_ops_lib
+    xnnpack_backend
+    XNNPACK
+    pthreadpool
+    cpuinfo
   )
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
   target_link_options_shared_lib(quantized_ops_lib)
   target_link_options_shared_lib(xnnpack_backend)
-  target_link_options_shared_lib(custom_ops)
   # Not clear why linking executorch as whole-archive outside android/apple is leading
   # to double registration. Most likely because of linkage issues.
   # Will figure this out later. Until then use this.
@@ -77,6 +75,8 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
+  target_link_libraries(et_run PRIVATE
+  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)

From 04adc614e88e35799bc61a8ca836f899c28e045e Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 25 Apr 2024 01:02:22 -0700
Subject: [PATCH 3/4] Add build_native.sh to CI job

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .github/workflows/pull.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index cdff2c57d..455c85abf 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -838,13 +838,11 @@ jobs:
           pip install -r requirements.txt
 
           export TORCHCHAT_ROOT=${PWD}
-          export ENABLE_ET_PYBIND=false
-          ./scripts/install_et.sh $ENABLE_ET_PYBIND
+          bash scripts/build_native.sh et
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
           python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
-          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
-          cmake --build ./cmake-out --target et_run
+
       - name: Download checkpoints
         run: |
 
@@ -891,8 +889,8 @@ jobs:
           pip install -r requirements.txt
           pip list
 
-          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
-          cmake --build ./cmake-out --target aoti_run
+          bash scripts/build_native.sh aoti
+
       - name: Download checkpoint
         run: |
           mkdir -p checkpoints/stories15M

From e9396a15452147188ba43cef88e947e0639fa293 Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Thu, 25 Apr 2024 12:01:28 -0700
Subject: [PATCH 4/4] Add README for building native runner for aoti

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.md b/README.md
index b3d62dcd3..9d0d7cda8 100644
--- a/README.md
+++ b/README.md
@@ -235,6 +235,20 @@ python3 torchchat.py generate --dso-path stories15M.so --prompt "Hello my name i
 
 NOTE: The exported model will be large. We suggest you quantize the model, explained further down, before deploying the model on device.
 
+**Build Native Runner Binary**
+
+We provide an end-to-end C++ [runner](runner/run.cpp) that runs the `*.so` file exported after following the previous [examples](#aoti-aot-inductor) section. To build the runner binary on your Mac or Linux:
+
+```bash
+scripts/build_native.sh aoti
+```
+
+Run:
+
+```bash
+cmake-out/aoti_run model.so -z tokenizer.model -i "Once upon a time"
+```
+
 ### ExecuTorch
 
 ExecuTorch enables you to optimize your model for execution on a mobile or embedded device, but can also be used on desktop for testing.