Skip to content

Commit

Permalink
Add support for tiktoken and refactored runner structure (pytorch#435)
Browse files Browse the repository at this point in the history
* Add support for tiktoken and refactored runner structure

Summary:

Unified runner and move runner-et/CMakeLists.txt to runner/et.cmake and
runner-aoti/CMakeLists.txt to runner/aoti.cmake.

Added a root level CMakeLists.txt to build a tokenizer library and link
to both targets separately.

In CLI we need to specify the target to run:

```
cmake --build ./cmake-out --target et_run/aoti_run
```

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Fix CI

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Fix more CI

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Further fix CI

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Lint`

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Update build_android.sh

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Rebase

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

* Fix cmake commands in CI job

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
  • Loading branch information
larryliu0820 authored and malfet committed Jul 17, 2024
1 parent 0b4b56a commit de9c414
Show file tree
Hide file tree
Showing 21 changed files with 788 additions and 145 deletions.
20 changes: 11 additions & 9 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,9 @@ jobs:
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v2
with:
Expand All @@ -737,8 +739,8 @@ jobs:
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
cmake --build ./runner-et/cmake-out
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
cmake --build ./cmake-out --target et_run
- name: Download checkpoints
run: |
Expand All @@ -753,7 +755,7 @@ jobs:
cat ./output_eager
python torchchat.py export stories15M --output-pte-path ./model.pte
./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
./cmake-out/et_run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
cat ./output_et
echo "Tests complete."
Expand All @@ -770,6 +772,8 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v4
with:
Expand All @@ -783,10 +787,8 @@ jobs:
pip install -r requirements.txt
pip list
cd ${TORCHCHAT_ROOT}/runner-aoti
cmake -Bbuild -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`
cmake --build build
cd ..
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
cmake --build ./cmake-out --target aoti_run
- name: Download checkpoint
run: |
mkdir -p checkpoints/stories15M
Expand All @@ -807,7 +809,7 @@ jobs:
python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-dso-path /tmp/model.so
./runner-aoti/build/run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
cat ${PWD}/output_aoti
echo "Tests complete."
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "tokenizer/third-party/abseil-cpp"]
path = tokenizer/third-party/abseil-cpp
url = https://github.com/abseil/abseil-cpp.git
[submodule "tokenizer/third-party/re2"]
path = tokenizer/third-party/re2
url = https://github.com/google/re2.git
24 changes: 24 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)
IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

project(Torchchat)

# include tokenizer
add_subdirectory(tokenizer)

# include et_run executable
include(runner/et.cmake)
if(TARGET et_run)
target_link_libraries(et_run PUBLIC tokenizer)
endif()

# include aoti_run executable
include(runner/aoti.cmake)
if(TARGET aoti_run)
target_link_libraries(aoti_run tokenizer)
endif()
17 changes: 0 additions & 17 deletions runner-aoti/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions runner-aoti/run.cpp

This file was deleted.

89 changes: 0 additions & 89 deletions runner-et/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions runner-et/run.cpp

This file was deleted.

File renamed without changes.
21 changes: 21 additions & 0 deletions runner/aoti.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)
IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

find_package(CUDA)

find_package(Torch)
if(Torch_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive")

add_executable(aoti_run runner/run.cpp)

target_compile_options(aoti_run PUBLIC -D__AOTI_MODEL__)
target_include_directories(aoti_run PRIVATE ${TORCHCHAT_ROOT}/runner)
target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
endif()
6 changes: 3 additions & 3 deletions runner-et/build_android.sh → runner/build_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ export CMAKE_OUT_DIR="cmake-out-android"
#

build_runner_et() {
rm -rf build/cmake-out-android
rm -rf cmake-out-android
echo "ET BUILD DIR IS ${ET_BUILD_DIR}"
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S ./runner-et -B build/cmake-out-android -G Ninja
cmake --build build/cmake-out-android/ -j16 --config Release
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja
cmake --build cmake-out-android/ -j16 --config Release --target et_run
}

find_cmake_prefix_path
Expand Down
98 changes: 98 additions & 0 deletions runner/et.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)

IF(DEFINED ENV{ET_BUILD_DIR})
set(ET_BUILD_DIR $ENV{ET_BUILD_DIR})
ELSE()
set(ET_BUILD_DIR "et-build")
ENDIF()

MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")

IF(DEFINED ENV{CMAKE_OUT_DIR})
set(CMAKE_OUT_DIR $ENV{CMAKE_OUT_DIR})
ELSE()
set(CMAKE_OUT_DIR "cmake-out")
ENDIF()

MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")

IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

project(Torchchat)

include(CMakePrintHelpers)
include(runner/Utils.cmake)

cmake_print_variables(TORCHCHAT_ROOT)

MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch")
set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch)
find_package(executorch CONFIG PATHS ${executorch_DIR})
if(executorch_FOUND)
set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)

cmake_print_variables(_common_include_directories)

target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
add_executable(et_run runner/run.cpp)

target_compile_options(et_run PUBLIC -D__ET__MODEL -D_GLIBCXX_USE_CXX11_ABI=1)

# Link ET runtime + extensions
target_link_libraries(
et_run PRIVATE
executorch
extension_module
${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
optimized_kernels
quantized_kernels
portable_kernels
cpublas
eigen_blas
# The libraries below need to be whole-archived linked
optimized_native_cpu_ops_lib
quantized_ops_lib
xnnpack_backend
XNNPACK
pthreadpool
cpuinfo
)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
target_link_options_shared_lib(quantized_ops_lib)
target_link_options_shared_lib(xnnpack_backend)
# Not clear why linking executorch as whole-archive outside android/apple is leading
# to double registration. Most likely because of linkage issues.
# Will figure this out later. Until then use this.
if(ANDROID OR APPLE)
target_link_options_shared_lib(executorch)
endif()

target_link_libraries(et_run PRIVATE
"$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")

# This one is needed for cpuinfo where it uses android specific log lib
if(ANDROID)
target_link_libraries(et_run PRIVATE log)
endif()

# Adding target_link_options_shared_lib as commented out below leads to this:
#
# CMake Error at Utils.cmake:22 (target_link_options):
# Cannot specify link options for target
# "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
# which is not built by this project.
# Call Stack (most recent call first):
# Utils.cmake:30 (macos_kernel_link_options)
# CMakeLists.txt:41 (target_link_options_shared_lib)
#
#target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch

# This works on mac, but appears to run into issues on linux
# It is needed to solve:
# E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
endif()
23 changes: 18 additions & 5 deletions runner/run.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
/* Inference for Llama-2 Transformer model in pure C++ */

#include <ctype.h>
#include <math.h>
#include <stdint.h>
Expand Down Expand Up @@ -397,7 +396,7 @@ void generate(
}

// encode the (string) prompt into tokens sequence
std::string prompt_str(prompt);
std::string prompt_str = prompt;
std::vector<uint64_t> prompt_tokens = tokenizer->encode(prompt_str, 1, 0);
int num_prompt_tokens = prompt_tokens.size();
if (num_prompt_tokens < 1) {
Expand Down Expand Up @@ -674,9 +673,23 @@ int main(int argc, char* argv[]) {
build_transformer(&transformer, checkpoint_path, vocab_size, steps);

// build the Tokenizer via the tokenizer .bin file
Tokenizer* tokenizer =
new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
Tokenizer* tokenizer = nullptr;

// Try to load using Tiktoken, if exception then switch to another tokenizer
try {
tokenizer =
new Tiktoken(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
} catch (const std::invalid_argument&) {
fprintf(
stderr,
"Failed to load %s into a Tiktoken tokenizer. Trying sentencepiece tokenizer..\n",
tokenizer_path);
delete tokenizer;
tokenizer =
new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
}

// build the Sampler
Sampler sampler;
Expand Down
Loading

0 comments on commit de9c414

Please sign in to comment.