From 4aca3b1183c8360465b994dd380e4b9eb04ad122 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Wed, 9 Apr 2025 14:18:55 -0700 Subject: [PATCH 01/19] Bump external tokenizer submodule version --- extension/llm/tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index d70f5a76055..4167468eff0 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit d70f5a760552d8d3bb288cdd93eebde477bb6eb0 +Subproject commit 4167468eff098f93a431bfbc9ae23e76c8d26ed3 From 13408fb89d30f2d4b6b5953a6c87314b13202b05 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 10 Mar 2025 16:32:58 -0700 Subject: [PATCH 02/19] Add stub impl of json tokenizer to llama runner --- examples/models/llama/runner/CMakeLists.txt | 4 ++ examples/models/llama/runner/runner.cpp | 42 +++++++++++++-------- examples/models/llama/runner/targets.bzl | 1 + extension/llm/tokenizer/hf_tokenizer.cpp | 41 ++++++++++++++++++++ extension/llm/tokenizer/hf_tokenizer.h | 34 +++++++++++++++++ extension/llm/tokenizer/targets.bzl | 17 +++++++++ 6 files changed, 123 insertions(+), 16 deletions(-) create mode 100644 extension/llm/tokenizer/hf_tokenizer.cpp create mode 100644 extension/llm/tokenizer/hf_tokenizer.h diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt index 04fe23e4d82..9c39b35199f 100644 --- a/examples/models/llama/runner/CMakeLists.txt +++ b/examples/models/llama/runner/CMakeLists.txt @@ -47,6 +47,10 @@ list( ) list(APPEND _llama_runner__srcs ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp + ) +list( + APPEND _llama_runner__srcs + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp ) if(CMAKE_TOOLCHAIN_IOS diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 0ecc611ef6c..0014a476864 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -17,6 +17,7 @@ #include #include +#include namespace example { @@ -77,24 +78,33 @@ Error Runner::load() { return Error::Ok; } ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward")); - // load tokenizer. Assuming tiktoken is the default tokenizer + // Load tokenizer. tokenizer_ = nullptr; - tokenizer_ = get_tiktoken_for_llama(); - ::tokenizers::Error err = tokenizer_->load(tokenizer_path_); - // Rely on tiktoken to throw error if the artifact is incompatible. Then we - // fallback to BPE tokenizer. - if (err != ::tokenizers::Error::Ok) { + // Check if tokenizer_path_ ends with ".json". + if (tokenizer_path_.size() >= 5 && + tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) { + tokenizer_ = std::make_unique(); + tokenizer_->load(tokenizer_path_); ET_LOG( - Info, - "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer", - tokenizer_path_.c_str()); - tokenizer_.reset(); - tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>(); - err = tokenizer_->load(tokenizer_path_); - ET_CHECK_TK_OK_OR_RETURN_ERROR( - err, - "Failed to load %s as a llama2.c tokenizer artifact", - tokenizer_path_.c_str()); + Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str()); + } else { + ::tokenizers::Error err = tokenizer_->load(tokenizer_path_); + tokenizer_ = get_tiktoken_for_llama(); + // Rely on tiktoken to throw error if the artifact is incompatible. Then we + // fallback to BPE tokenizer. + if (err != ::tokenizers::Error::Ok) { + ET_LOG( + Info, + "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer", + tokenizer_path_.c_str()); + tokenizer_.reset(); + tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>(); + err = tokenizer_->load(tokenizer_path_); + ET_CHECK_TK_OK_OR_RETURN_ERROR( + err, + "Failed to load %s as a llama2.c tokenizer artifact", + tokenizer_path_.c_str()); + } } ET_LOG(Info, "Reading metadata from model"); diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl index 60fc1f2b74d..5797c25e6d3 100644 --- a/examples/models/llama/runner/targets.bzl +++ b/examples/models/llama/runner/targets.bzl @@ -49,6 +49,7 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix, "//executorch/examples/models/llama/tokenizer:tiktoken", "//pytorch/tokenizers:llama2c_tokenizer", + "//pytorch/tokenizers:hf_tokenizer", ] + (_get_operator_lib(aten)) + ([ # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE) # Therefore enable it explicitly for now to avoid failing tests diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp new file mode 100644 index 00000000000..c7a32127335 --- /dev/null +++ b/extension/llm/tokenizer/hf_tokenizer.cpp @@ -0,0 +1,41 @@ +#include + +#include +#include +#include +#include + +using ::executorch::runtime::Error; +using ::executorch::runtime::Result; + +namespace executorch { +namespace extension { +namespace llm { + +HfTokenizer::~HfTokenizer() {} + +Error HfTokenizer::load(const std::string& tokenizer_path) { + // Stub implementation for loading the tokenizer. + // TODO: Implement actual loading logic. + return ::executorch::runtime::Error::Ok; +} + +Result> +HfTokenizer::encode(const std::string& input, int8_t bos, int8_t eos) const { + // Stub implementation for encoding. + // TODO: Implement actual encoding logic. + std::vector tokens; + return ::executorch::runtime::Result>(tokens); +} + +Result HfTokenizer::decode(uint64_t prev_token, uint64_t token) + const { + // Stub implementation for decoding. + // TODO: Implement actual decoding logic. + std::string decoded_string; + return ::executorch::runtime::Result(decoded_string); +} + +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h new file mode 100644 index 00000000000..eee2d2426b5 --- /dev/null +++ b/extension/llm/tokenizer/hf_tokenizer.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace extension { +namespace llm { + +class ET_EXPERIMENTAL HfTokenizer : public Tokenizer { + public: + explicit HfTokenizer(){}; + ~HfTokenizer() override; + + ::executorch::runtime::Error load(const std::string& tokenizer_path) override; + + ::executorch::runtime::Result> + encode(const std::string& input, int8_t bos, int8_t eos) const override; + + ::executorch::runtime::Result decode( + uint64_t prev_token, + uint64_t token) const override; +}; + +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl index 7b545054390..1bdf305094b 100644 --- a/extension/llm/tokenizer/targets.bzl +++ b/extension/llm/tokenizer/targets.bzl @@ -96,3 +96,20 @@ def define_common_targets(): "re2", ], ) + + runtime.cxx_library( + name = "hf_tokenizer", + srcs = [ + "hf_tokenizer.cpp", + ], + exported_headers = [ + "hf_tokenizer.h", + ], + exported_deps = [ + ":tokenizer_header", + "//executorch/runtime/core:core", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + ], + ) From d790908f565a8512a092745365e66ebdab20949e Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 04:28:27 -0700 Subject: [PATCH 03/19] Cmake --- examples/models/llama/runner/CMakeLists.txt | 15 ++++++++++----- examples/models/llama/runner/runner.cpp | 2 ++ examples/models/llama/runner/targets.bzl | 2 +- extension/llm/runner/CMakeLists.txt | 7 +++++++ 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt index 9c39b35199f..4ee2e6214c2 100644 --- a/examples/models/llama/runner/CMakeLists.txt +++ b/examples/models/llama/runner/CMakeLists.txt @@ -47,10 +47,6 @@ list( ) list(APPEND _llama_runner__srcs ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp - ) -list( - APPEND _llama_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/hf_tokenizer.cpp ) if(CMAKE_TOOLCHAIN_IOS @@ -77,10 +73,19 @@ add_subdirectory( ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/re2 ${CMAKE_CURRENT_BINARY_DIR}/re2 ) +add_subdirectory( + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json + ${CMAKE_CURRENT_BINARY_DIR}/json +) +target_include_directories(llama_runner + PRIVATE ${CMAKE_INSTALL_PREFIX}/include + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src +) set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) set(llama_runner_deps executorch extension_data_loader extension_module - extension_tensor re2::re2 + extension_tensor re2::re2 nlohmann_json::nlohmann_json ) target_link_libraries(llama_runner PUBLIC ${llama_runner_deps}) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 0014a476864..d81dd40252a 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -82,8 +82,10 @@ Error Runner::load() { tokenizer_ = nullptr; // Check if tokenizer_path_ ends with ".json". if (tokenizer_path_.size() >= 5 && + tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) { tokenizer_ = std::make_unique(); + ET_LOG(Info, "Loading json tokenizer"); tokenizer_->load(tokenizer_path_); ET_LOG( Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str()); diff --git a/examples/models/llama/runner/targets.bzl b/examples/models/llama/runner/targets.bzl index 5797c25e6d3..158202cf55a 100644 --- a/examples/models/llama/runner/targets.bzl +++ b/examples/models/llama/runner/targets.bzl @@ -49,7 +49,7 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix, "//executorch/examples/models/llama/tokenizer:tiktoken", "//pytorch/tokenizers:llama2c_tokenizer", - "//pytorch/tokenizers:hf_tokenizer", + "//pytorch/tokenizers:hf_tokenizer", ] + (_get_operator_lib(aten)) + ([ # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE) # Therefore enable it explicitly for now to avoid failing tests diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt index 993314ccd63..55d599ff998 100644 --- a/extension/llm/runner/CMakeLists.txt +++ b/extension/llm/runner/CMakeLists.txt @@ -49,6 +49,13 @@ set(runner_deps executorch extension_data_loader extension_module target_link_libraries(extension_llm_runner PUBLIC ${runner_deps}) +target_include_directories( + extension_llm_runner + PUBLIC + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src +) + target_include_directories( extension_llm_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include From 97110acff06785e98e707a279481755a165d0299 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 24 Mar 2025 05:03:30 -0700 Subject: [PATCH 04/19] Remove stub --- extension/llm/tokenizer/hf_tokenizer.cpp | 41 ------------------------ extension/llm/tokenizer/hf_tokenizer.h | 34 -------------------- extension/llm/tokenizer/targets.bzl | 17 ---------- 3 files changed, 92 deletions(-) delete mode 100644 extension/llm/tokenizer/hf_tokenizer.cpp delete mode 100644 extension/llm/tokenizer/hf_tokenizer.h diff --git a/extension/llm/tokenizer/hf_tokenizer.cpp b/extension/llm/tokenizer/hf_tokenizer.cpp deleted file mode 100644 index c7a32127335..00000000000 --- a/extension/llm/tokenizer/hf_tokenizer.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include - -#include -#include -#include -#include - -using ::executorch::runtime::Error; -using ::executorch::runtime::Result; - -namespace executorch { -namespace extension { -namespace llm { - -HfTokenizer::~HfTokenizer() {} - -Error HfTokenizer::load(const std::string& tokenizer_path) { - // Stub implementation for loading the tokenizer. - // TODO: Implement actual loading logic. - return ::executorch::runtime::Error::Ok; -} - -Result> -HfTokenizer::encode(const std::string& input, int8_t bos, int8_t eos) const { - // Stub implementation for encoding. - // TODO: Implement actual encoding logic. - std::vector tokens; - return ::executorch::runtime::Result>(tokens); -} - -Result HfTokenizer::decode(uint64_t prev_token, uint64_t token) - const { - // Stub implementation for decoding. - // TODO: Implement actual decoding logic. - std::string decoded_string; - return ::executorch::runtime::Result(decoded_string); -} - -} // namespace llm -} // namespace extension -} // namespace executorch diff --git a/extension/llm/tokenizer/hf_tokenizer.h b/extension/llm/tokenizer/hf_tokenizer.h deleted file mode 100644 index eee2d2426b5..00000000000 --- a/extension/llm/tokenizer/hf_tokenizer.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include - -namespace executorch { -namespace extension { -namespace llm { - -class ET_EXPERIMENTAL HfTokenizer : public Tokenizer { - public: - explicit HfTokenizer(){}; - ~HfTokenizer() override; - - ::executorch::runtime::Error load(const std::string& tokenizer_path) override; - - ::executorch::runtime::Result> - encode(const std::string& input, int8_t bos, int8_t eos) const override; - - ::executorch::runtime::Result decode( - uint64_t prev_token, - uint64_t token) const override; -}; - -} // namespace llm -} // namespace extension -} // namespace executorch diff --git a/extension/llm/tokenizer/targets.bzl b/extension/llm/tokenizer/targets.bzl index 1bdf305094b..7b545054390 100644 --- a/extension/llm/tokenizer/targets.bzl +++ b/extension/llm/tokenizer/targets.bzl @@ -96,20 +96,3 @@ def define_common_targets(): "re2", ], ) - - runtime.cxx_library( - name = "hf_tokenizer", - srcs = [ - "hf_tokenizer.cpp", - ], - exported_headers = [ - "hf_tokenizer.h", - ], - exported_deps = [ - ":tokenizer_header", - "//executorch/runtime/core:core", - ], - visibility = [ - "@EXECUTORCH_CLIENTS", - ], - ) From 173308ef09318bb0905d682c2230323146102eca Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Wed, 26 Mar 2025 05:38:51 -0700 Subject: [PATCH 05/19] Scott pr review --- examples/models/llama/runner/runner.cpp | 71 +++++++++++++++---------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index d81dd40252a..9ad82864efc 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -16,8 +16,8 @@ #include #include -#include #include +#include namespace example { @@ -36,6 +36,41 @@ static constexpr auto kMaxContextLen = "get_max_context_len"; static constexpr auto kVocabSize = "get_vocab_size"; static constexpr auto kUseKVCache = "use_kv_cache"; static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; + +std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer( + const std::string& tokenizer_path) { + std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr; + ::tokenizers::Error err; + + // First try to load as a json tokenizer. + { + auto tokenizer = std::make_unique(); + if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded json tokenizer"); + return tokenizer; + } + } + + // Try to load as tiktoken tokenizer. + { + auto tokenizer = get_tiktoken_for_llama(); + if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded TikToken tokenizer"); + return tokenizer; + } + } + + // Try to load as BPE tokenizer. + { + auto tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>(); + if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded BPE tokenizer"); + return tokenizer; + } + } + + return nullptr; +} } // namespace Runner::Runner( @@ -78,35 +113,15 @@ Error Runner::load() { return Error::Ok; } ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward")); + // Load tokenizer. - tokenizer_ = nullptr; - // Check if tokenizer_path_ ends with ".json". - if (tokenizer_path_.size() >= 5 && - - tokenizer_path_.compare(tokenizer_path_.size() - 5, 5, ".json") == 0) { - tokenizer_ = std::make_unique(); - ET_LOG(Info, "Loading json tokenizer"); - tokenizer_->load(tokenizer_path_); + tokenizer_ = load_tokenizer(tokenizer_path_); + if (tokenizer_ == nullptr) { ET_LOG( - Info, "Loaded tokenizer %s as HF tokenizer", tokenizer_path_.c_str()); - } else { - ::tokenizers::Error err = tokenizer_->load(tokenizer_path_); - tokenizer_ = get_tiktoken_for_llama(); - // Rely on tiktoken to throw error if the artifact is incompatible. Then we - // fallback to BPE tokenizer. - if (err != ::tokenizers::Error::Ok) { - ET_LOG( - Info, - "Failed to load %s as a Tiktoken artifact, trying BPE tokenizer", - tokenizer_path_.c_str()); - tokenizer_.reset(); - tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>(); - err = tokenizer_->load(tokenizer_path_); - ET_CHECK_TK_OK_OR_RETURN_ERROR( - err, - "Failed to load %s as a llama2.c tokenizer artifact", - tokenizer_path_.c_str()); - } + Error, + "Failed to load %s as a llama2.c tokenizer artifact", + tokenizer_path_.c_str()); + return ::executorch::runtime::Error::InvalidArgument; } ET_LOG(Info, "Reading metadata from model"); From dc8a31aa88e207f371c9adb5a4cc92974e80e4eb Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 15 Apr 2025 14:14:24 -0700 Subject: [PATCH 06/19] Target link tokenizers library --- examples/models/llama/runner/CMakeLists.txt | 45 ++++++--------------- extension/llm/runner/CMakeLists.txt | 2 +- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt index 4ee2e6214c2..0807e6fa422 100644 --- a/examples/models/llama/runner/CMakeLists.txt +++ b/examples/models/llama/runner/CMakeLists.txt @@ -41,14 +41,6 @@ target_include_directories( extension_module INTERFACE ${_common_include_directories} ) -list( - APPEND _llama_runner__srcs - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp -) -list(APPEND _llama_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp -) - if(CMAKE_TOOLCHAIN_IOS OR ANDROID OR APPLE @@ -60,32 +52,8 @@ else() add_library(llama_runner SHARED ${_llama_runner__srcs}) endif() -# find RE2 for tokenizer, build tiktoken -set(ABSL_ENABLE_INSTALL ON) -set(ABSL_PROPAGATE_CXX_STD ON) -set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) -add_subdirectory( - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/abseil-cpp - ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp -) -add_subdirectory( - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/re2 - ${CMAKE_CURRENT_BINARY_DIR}/re2 -) -add_subdirectory( - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json - ${CMAKE_CURRENT_BINARY_DIR}/json -) -target_include_directories(llama_runner - PRIVATE ${CMAKE_INSTALL_PREFIX}/include - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/include - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/llama.cpp-unicode/src -) -set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) - set(llama_runner_deps executorch extension_data_loader extension_module - extension_tensor re2::re2 nlohmann_json::nlohmann_json + extension_tensor ) target_link_libraries(llama_runner PUBLIC ${llama_runner_deps}) @@ -94,6 +62,17 @@ target_include_directories( llama_runner INTERFACE ${_common_include_directories} ) + +# Include tokenizers dependency +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +add_subdirectory( + ${EXECUTORCH_ROOT}/extension/llm/tokenizers + ${CMAKE_CURRENT_BINARY_DIR}/tokenizers +) +target_link_libraries( + llama_runner PUBLIC tokenizers +) + target_include_directories( llama_runner PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt index 55d599ff998..c71d8f319ec 100644 --- a/extension/llm/runner/CMakeLists.txt +++ b/extension/llm/runner/CMakeLists.txt @@ -59,4 +59,4 @@ target_include_directories( target_include_directories( extension_llm_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include -) + ) From fda4ea52aa673672693987c16b7fc0e1edbf7d97 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Thu, 17 Apr 2025 17:08:49 -0700 Subject: [PATCH 07/19] Remove unused err --- examples/models/llama/runner/runner.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index f6ae6bb9dcd..654855d9ab4 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -41,7 +41,6 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer( const std::string& tokenizer_path) { std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr; - ::tokenizers::Error err; // First try to load as a json tokenizer. { From aac48326494a5d3a979f81139619a79275cd216f Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Fri, 25 Apr 2025 13:16:58 -0700 Subject: [PATCH 08/19] Fix merge error --- examples/models/llama/runner/runner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 462dde2975f..f58225b5f8b 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -133,7 +133,7 @@ Error Runner::load() { tokenizer_.reset(); // @lint-ignore CLANGTIDY facebook-hte-Deprecated tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>(); - err = tokenizer_->load(tokenizer_path_); + auto err = tokenizer_->load(tokenizer_path_); ET_CHECK_TK_OK_OR_RETURN_ERROR( err, "Failed to load %s as a llama2.c tokenizer artifact", From f0406e20631dfdb8533b79ae3af62075c5cde894 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Sun, 27 Apr 2025 21:37:55 -0700 Subject: [PATCH 09/19] Pin bump tokenizers --- extension/llm/tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index 3f9c458586e..0ed8e2e3448 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit 3f9c458586ee576a7ddafb48eb491f117187e178 +Subproject commit 0ed8e2e34486f119a87c15d000080c5e3eea7aea From 7fdb8ae159822f85f5eeef0b9e9e9df9bad1922a Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Sun, 27 Apr 2025 21:38:05 -0700 Subject: [PATCH 10/19] Fix qnn build --- examples/qualcomm/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 7b2c43b3f46..994657a2013 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -82,6 +82,14 @@ add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2 ${CMAKE_CURRENT_BINARY_DIR}/re2 ) +add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/pcre2 + ${CMAKE_CURRENT_BINARY_DIR}/pcre2 +) +add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include + ${CMAKE_CURRENT_BINARY_DIR}/json +) set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) # build qnn_executor_runner From cb12e06fd49d3a402cb11b5a0f2eb31f473f0919 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Sun, 27 Apr 2025 22:00:36 -0700 Subject: [PATCH 11/19] Nevermind --- examples/qualcomm/CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 994657a2013..7b2c43b3f46 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -82,14 +82,6 @@ add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/re2 ${CMAKE_CURRENT_BINARY_DIR}/re2 ) -add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/pcre2 - ${CMAKE_CURRENT_BINARY_DIR}/pcre2 -) -add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include - ${CMAKE_CURRENT_BINARY_DIR}/json -) set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) # build qnn_executor_runner From 8f0c5320c6cad3d8bc56686ce0e83762779af539 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Sun, 27 Apr 2025 22:27:52 -0700 Subject: [PATCH 12/19] Try to fix qnn --- examples/qualcomm/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 7b2c43b3f46..d140fb357e6 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -35,7 +35,7 @@ find_package(gflags REQUIRED) set(_common_compile_options -Wno-deprecated-declarations -fPIC) # Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) +set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/extension/llm/tokenizers/third-party/json/single_include) # # The `__srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}. @@ -67,6 +67,7 @@ target_include_directories( PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include ) # find RE2 for tokenizer From 846951ed54b7c6d99fd387b7e0ddc788d0e73ed5 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Sun, 27 Apr 2025 22:46:39 -0700 Subject: [PATCH 13/19] Fix unicode in qnn --- examples/qualcomm/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index d140fb357e6..4f338a23044 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -68,6 +68,8 @@ target_include_directories( ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/include ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/json/single_include + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src ) # find RE2 for tokenizer From ac83e12cebde146d317addd4ed8383107e2da1e2 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 28 Apr 2025 08:18:13 -0700 Subject: [PATCH 14/19] Fix qaihub --- .../qualcomm/qaihub_scripts/llama/CMakeLists.txt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt index f96d0169809..16d91013349 100644 --- a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt +++ b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt @@ -27,7 +27,12 @@ list(PREPEND _qaihub_llama2_7b_runner__srcs # build qaihub llama2 7b runner add_executable(qaihub_llama2_7b_runner ${_qaihub_llama2_7b_runner__srcs}) target_include_directories( - qaihub_llama2_7b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include + qaihub_llama2_7b_runner PUBLIC + ${_common_include_directories} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src ) target_link_libraries( qaihub_llama2_7b_runner @@ -69,7 +74,12 @@ list( # build qaihub llama3 8b runner add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs}) target_include_directories( - qaihub_llama3_8b_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include + qaihub_llama3_8b_runner PUBLIC + ${_common_include_directories} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/json/single_include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/third-party/llama.cpp-unicode/src ) target_link_libraries( From c90896845825c416aedfcf1825fc3446c9425802 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:10:25 -0700 Subject: [PATCH 15/19] Fix local shadowing --- examples/models/llama/runner/runner.cpp | 36 +++++++++---------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index f58225b5f8b..534738521a5 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -40,33 +40,23 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer( const std::string& tokenizer_path) { - std::unique_ptr<::tokenizers::Tokenizer> tokenizer = nullptr; - - // First try to load as a json tokenizer. - { - auto tokenizer = std::make_unique(); - if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded json tokenizer"); - return tokenizer; - } + + auto json_tokenizer = std::make_unique(); + if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded json tokenizer"); + return json_tokenizer; } - // Try to load as tiktoken tokenizer. - { - auto tokenizer = get_tiktoken_for_llama(); - if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded TikToken tokenizer"); - return tokenizer; - } + auto tiktoken_tokenizer = get_tiktoken_for_llama(); + if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded TikToken tokenizer"); + return tiktoken_tokenizer; } - // Try to load as BPE tokenizer. - { - auto tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>(); - if (tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded BPE tokenizer"); - return tokenizer; - } + auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>(); + if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded BPE tokenizer"); + return bpe_tokenizer; } return nullptr; From 30d770bd5613f9cf9828015dd94765752d40dd61 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 29 Apr 2025 00:54:37 -0700 Subject: [PATCH 16/19] Fix duplicate symbol --- extension/llm/tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index 0ed8e2e3448..08379114321 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit 0ed8e2e34486f119a87c15d000080c5e3eea7aea +Subproject commit 083791143216ef5bb33086082438146163b6b571 From 6cc40bba79565f87d36b9f9483a7e7d144056179 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 29 Apr 2025 10:16:43 -0700 Subject: [PATCH 17/19] Bump tokenizers --- extension/llm/tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index 08379114321..35d185e0f5e 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit 083791143216ef5bb33086082438146163b6b571 +Subproject commit 35d185e0f5e80c261c4ebf4f4993ff55f2792626 From 194c829efa919c0e095fc317c28881ce6ffea937 Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 29 Apr 2025 11:50:26 -0700 Subject: [PATCH 18/19] Lint --- examples/models/llama/runner/runner.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/models/llama/runner/runner.cpp b/examples/models/llama/runner/runner.cpp index 534738521a5..ef3681b74bc 100644 --- a/examples/models/llama/runner/runner.cpp +++ b/examples/models/llama/runner/runner.cpp @@ -40,7 +40,6 @@ static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; std::unique_ptr<::tokenizers::Tokenizer> load_tokenizer( const std::string& tokenizer_path) { - auto json_tokenizer = std::make_unique(); if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { ET_LOG(Info, "Loaded json tokenizer"); From 4ff5d8b67720f86b363b62bf15b4e6ad0926fbca Mon Sep 17 00:00:00 2001 From: Jack Zhang <32371937+jackzhxng@users.noreply.github.com> Date: Tue, 29 Apr 2025 20:32:41 -0700 Subject: [PATCH 19/19] Bump tokenizer dep one last time --- extension/llm/tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index 35d185e0f5e..1621280e058 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit 35d185e0f5e80c261c4ebf4f4993ff55f2792626 +Subproject commit 1621280e0588e4ed1dad12728db10e41c8ebb424