diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md index 58c46bb3f55..3bff5c903f8 100644 --- a/docs/source/llm/getting-started.md +++ b/docs/source/llm/getting-started.md @@ -394,7 +394,7 @@ acceleration and optimization. Any portions of the computation graph not delegated will be executed by the ExecuTorch operator implementations. To delegate the exported model to the specific backend, we need to import its -partitioner as well as edge compile config from Executorch Codebase first, then +partitioner as well as edge compile config from ExecuTorch Codebase first, then call `to_backend` with an instance of partitioner on the `EdgeProgramManager` object `to_edge` function created. @@ -482,7 +482,7 @@ target_link_libraries( ``` Keep the rest of the code the same. For more details refer to -[Exporting to Executorch](https://pytorch.org/executorch/main/llm/getting-started.html#step-1-exporting-to-executorch) +[Exporting to ExecuTorch](https://pytorch.org/executorch/main/llm/getting-started.html#step-1-exporting-to-executorch) and [Invoking the Runtime](https://pytorch.org/executorch/main/llm/getting-started.html#step-2-invoking-the-runtime) for more details diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp index 45b7975333a..2677e6ad804 100644 --- a/examples/models/llama2/runner/runner.cpp +++ b/examples/models/llama2/runner/runner.cpp @@ -10,6 +10,7 @@ // The module takes in a string as input and emits a string as output. #include +#include #include #include @@ -76,7 +77,7 @@ Error Runner::load() { append_eos_ = getMetadataHelper("append_eos_to_prompt", false); // Load tokenizer - tokenizer_ = std::make_unique(vocab_size_, bos_id_, eos_id_); + tokenizer_ = std::make_unique(vocab_size_, bos_id_, eos_id_); tokenizer_->load(tokenizer_path_); if (tokenizer_->bos_tok() != bos_id_) { ET_LOG( @@ -105,7 +106,7 @@ Error Runner::load() { } template -T Runner::getMetadataHelper(std::string method_name, T default_val) { +T Runner::getMetadataHelper(const std::string& method_name, T default_val) { T res = default_val; if (model_methods_.count(method_name)) { Result> outputs = module_->execute(method_name); @@ -484,9 +485,9 @@ void Runner::stop() { // explicit instantiation of template methods template int64_t Runner::getMetadataHelper( - std::string method_name, + const std::string& method_name, int64_t default_val); template bool Runner::getMetadataHelper( - std::string method_name, + const std::string& method_name, bool default_val); } // namespace torch::executor diff --git a/examples/models/llama2/runner/runner.h b/examples/models/llama2/runner/runner.h index 08f5e33c47e..4e200d5e6ca 100644 --- a/examples/models/llama2/runner/runner.h +++ b/examples/models/llama2/runner/runner.h @@ -69,7 +69,7 @@ class Runner { private: // metadata template - T getMetadataHelper(std::string method_name, T default_val); + T getMetadataHelper(const std::string& method_name, T default_val); template int32_t logitsToToken(const exec_aten::Tensor& logits_tensor, int64_t pos, T _); diff --git a/examples/models/llama2/tokenizer/tokenizer.cpp b/examples/models/llama2/tokenizer/bpe_tokenizer.cpp similarity index 95% rename from examples/models/llama2/tokenizer/tokenizer.cpp rename to examples/models/llama2/tokenizer/bpe_tokenizer.cpp index 40fc3d5683e..ed7d34aca4d 100644 --- a/examples/models/llama2/tokenizer/tokenizer.cpp +++ b/examples/models/llama2/tokenizer/bpe_tokenizer.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include #include @@ -23,11 +23,11 @@ static int compare_tokens(const void* a, const void* b) { return strcmp(((TokenIndex*)a)->str, ((TokenIndex*)b)->str); } -Tokenizer::Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok) - : initialized_(false), - vocab_size_(vocab_size), - bos_tok_(bos_tok), - eos_tok_(eos_tok), +BPETokenizer::BPETokenizer( + int32_t vocab_size, + uint64_t bos_tok, + uint64_t eos_tok) + : Tokenizer(vocab_size, bos_tok, eos_tok), vocab_(std::make_unique(vocab_size)), vocab_scores_(std::make_unique(vocab_size)), sorted_vocab_(std::make_unique(vocab_size)) { @@ -47,7 +47,7 @@ Tokenizer::Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok) * @param tokenizer_path The path to the tokenizer file. * @return Error */ -Error Tokenizer::load(const std::string& tokenizer_path) { +Error BPETokenizer::load(const std::string& tokenizer_path) { if (initialized_) { ET_LOG(Info, "Tokenizer already initialized"); return Error::Ok; @@ -131,7 +131,7 @@ Error Tokenizer::load(const std::string& tokenizer_path) { return Error::Ok; } -Tokenizer::~Tokenizer() { +BPETokenizer::~BPETokenizer() { for (int i = 0; i < vocab_size_; i++) { delete[] vocab_[i]; } @@ -145,7 +145,7 @@ Tokenizer::~Tokenizer() { * @return Result A pointer to the string representation of the * token. */ -Result Tokenizer::decode(uint64_t prev_token, uint64_t token) { +Result BPETokenizer::decode(uint64_t prev_token, uint64_t token) { if (!initialized_) { ET_LOG(Error, "Tokenizer not initialized"); return Error::NotSupported; @@ -187,7 +187,7 @@ str_lookup(const char* str, TokenIndex* sorted_vocab, int32_t vocab_size) { * @return Result> */ Result> -Tokenizer::encode(const std::string& text, int8_t bos, int8_t eos) { +BPETokenizer::encode(const std::string& text, int8_t bos, int8_t eos) { if (!initialized_) { ET_LOG(Error, "Tokenizer not initialized"); return Error::NotSupported; diff --git a/examples/models/llama2/tokenizer/bpe_tokenizer.h b/examples/models/llama2/tokenizer/bpe_tokenizer.h new file mode 100644 index 00000000000..82e3f396344 --- /dev/null +++ b/examples/models/llama2/tokenizer/bpe_tokenizer.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace torch { +namespace executor { + +struct TokenIndex { + const char* str; + int32_t id; +}; + +class BPETokenizer : public Tokenizer { + public: + explicit BPETokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok); + ~BPETokenizer() override; + + Error load(const std::string& tokenizer_path) override; + + Result> + encode(const std::string& input, int8_t bos, int8_t eos) override; + + Result decode(uint64_t prev_token, uint64_t token) override; + + private: + std::unique_ptr vocab_; + std::unique_ptr vocab_scores_; + std::unique_ptr sorted_vocab_; + unsigned int max_token_length_; + unsigned char byte_pieces_[512]; // stores all single-byte strings +}; +} // namespace executor +} // namespace torch diff --git a/examples/models/llama2/tokenizer/targets.bzl b/examples/models/llama2/tokenizer/targets.bzl index b63f780faa1..b25693558ae 100644 --- a/examples/models/llama2/tokenizer/targets.bzl +++ b/examples/models/llama2/tokenizer/targets.bzl @@ -4,10 +4,11 @@ def define_common_targets(): runtime.cxx_library( name = "tokenizer", srcs = [ - "tokenizer.cpp", + "bpe_tokenizer.cpp", ], exported_headers = [ "tokenizer.h", + "bpe_tokenizer.h", ], exported_deps = [ "//executorch/runtime/core/exec_aten:lib", diff --git a/examples/models/llama2/tokenizer/test/test_tokenizer.cpp b/examples/models/llama2/tokenizer/test/test_tokenizer.cpp index 787f008568c..1d1f83065cf 100644 --- a/examples/models/llama2/tokenizer/test/test_tokenizer.cpp +++ b/examples/models/llama2/tokenizer/test/test_tokenizer.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include #include @@ -20,7 +21,7 @@ class TokenizerExtensionTest : public Test { public: void SetUp() override { torch::executor::runtime_init(); - tokenizer_ = std::make_unique(32000, 1, 2); + tokenizer_ = std::make_unique(32000, 1, 2); modelPath_ = std::getenv("RESOURCES_PATH") + std::string("/test.bin"); } diff --git a/examples/models/llama2/tokenizer/tokenizer.h b/examples/models/llama2/tokenizer/tokenizer.h index 6b03278eace..5e9f0925823 100644 --- a/examples/models/llama2/tokenizer/tokenizer.h +++ b/examples/models/llama2/tokenizer/tokenizer.h @@ -26,22 +26,21 @@ namespace torch { namespace executor { -struct TokenIndex { - const char* str; - int32_t id; -}; - class Tokenizer { public: - explicit Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok); - ~Tokenizer(); + explicit Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok) + : initialized_(false), + vocab_size_(vocab_size), + bos_tok_(bos_tok), + eos_tok_(eos_tok) {} + virtual ~Tokenizer() {} - Error load(const std::string& tokenizer_path); + virtual Error load(const std::string& tokenizer_path) = 0; - Result> - encode(const std::string& input, int8_t bos, int8_t eos); + virtual Result> + encode(const std::string& input, int8_t bos, int8_t eos) = 0; - Result decode(uint64_t prev_token, uint64_t token); + virtual Result decode(uint64_t prev_token, uint64_t token) = 0; // getters int32_t vocab_size() const { @@ -56,15 +55,10 @@ class Tokenizer { return eos_tok_; } - private: + protected: bool initialized_; const int32_t vocab_size_; uint64_t bos_tok_, eos_tok_; - std::unique_ptr vocab_; - std::unique_ptr vocab_scores_; - std::unique_ptr sorted_vocab_; - unsigned int max_token_length_; - unsigned char byte_pieces_[512]; // stores all single-byte strings }; } // namespace executor