diff --git a/docs/source/llm/getting-started.md b/docs/source/llm/getting-started.md
index 58c46bb3f55..3bff5c903f8 100644
--- a/docs/source/llm/getting-started.md
+++ b/docs/source/llm/getting-started.md
@@ -394,7 +394,7 @@ acceleration and optimization. Any portions of the computation graph not
 delegated will be executed by the ExecuTorch operator implementations.
 
 To delegate the exported model to the specific backend, we need to import its
-partitioner as well as edge compile config from Executorch Codebase first, then
+partitioner as well as edge compile config from ExecuTorch Codebase first, then
 call `to_backend` with an instance of partitioner on the `EdgeProgramManager`
 object `to_edge` function created.
 
@@ -482,7 +482,7 @@ target_link_libraries(
 ```
 
 Keep the rest of the code the same. For more details refer to
-[Exporting to Executorch](https://pytorch.org/executorch/main/llm/getting-started.html#step-1-exporting-to-executorch)
+[Exporting to ExecuTorch](https://pytorch.org/executorch/main/llm/getting-started.html#step-1-exporting-to-executorch)
 and
 [Invoking the Runtime](https://pytorch.org/executorch/main/llm/getting-started.html#step-2-invoking-the-runtime)
 for more details
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
index 45b7975333a..2677e6ad804 100644
--- a/examples/models/llama2/runner/runner.cpp
+++ b/examples/models/llama2/runner/runner.cpp
@@ -10,6 +10,7 @@
 // The module takes in a string as input and emits a string as output.
 
 #include <executorch/examples/models/llama2/runner/runner.h>
+#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
 #include <executorch/extension/runner_util/managed_tensor.h>
 
@@ -76,7 +77,7 @@ Error Runner::load() {
   append_eos_ = getMetadataHelper("append_eos_to_prompt", false);
 
   // Load tokenizer
-  tokenizer_ = std::make_unique<Tokenizer>(vocab_size_, bos_id_, eos_id_);
+  tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
   tokenizer_->load(tokenizer_path_);
   if (tokenizer_->bos_tok() != bos_id_) {
     ET_LOG(
@@ -105,7 +106,7 @@ Error Runner::load() {
 }
 
 template <typename T>
-T Runner::getMetadataHelper(std::string method_name, T default_val) {
+T Runner::getMetadataHelper(const std::string& method_name, T default_val) {
   T res = default_val;
   if (model_methods_.count(method_name)) {
     Result<std::vector<EValue>> outputs = module_->execute(method_name);
@@ -484,9 +485,9 @@ void Runner::stop() {
 
 // explicit instantiation of template methods
 template int64_t Runner::getMetadataHelper<int64_t>(
-    std::string method_name,
+    const std::string& method_name,
     int64_t default_val);
 template bool Runner::getMetadataHelper<bool>(
-    std::string method_name,
+    const std::string& method_name,
     bool default_val);
 } // namespace torch::executor
diff --git a/examples/models/llama2/runner/runner.h b/examples/models/llama2/runner/runner.h
index 08f5e33c47e..4e200d5e6ca 100644
--- a/examples/models/llama2/runner/runner.h
+++ b/examples/models/llama2/runner/runner.h
@@ -69,7 +69,7 @@ class Runner {
  private:
   // metadata
   template <typename T>
-  T getMetadataHelper(std::string method_name, T default_val);
+  T getMetadataHelper(const std::string& method_name, T default_val);
   template <typename T>
   int32_t
   logitsToToken(const exec_aten::Tensor& logits_tensor, int64_t pos, T _);
diff --git a/examples/models/llama2/tokenizer/tokenizer.cpp b/examples/models/llama2/tokenizer/bpe_tokenizer.cpp
similarity index 95%
rename from examples/models/llama2/tokenizer/tokenizer.cpp
rename to examples/models/llama2/tokenizer/bpe_tokenizer.cpp
index 40fc3d5683e..ed7d34aca4d 100644
--- a/examples/models/llama2/tokenizer/tokenizer.cpp
+++ b/examples/models/llama2/tokenizer/bpe_tokenizer.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/examples/models/llama2/tokenizer/tokenizer.h>
+#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
 
 #include <string>
 
@@ -23,11 +23,11 @@ static int compare_tokens(const void* a, const void* b) {
   return strcmp(((TokenIndex*)a)->str, ((TokenIndex*)b)->str);
 }
 
-Tokenizer::Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok)
-    : initialized_(false),
-      vocab_size_(vocab_size),
-      bos_tok_(bos_tok),
-      eos_tok_(eos_tok),
+BPETokenizer::BPETokenizer(
+    int32_t vocab_size,
+    uint64_t bos_tok,
+    uint64_t eos_tok)
+    : Tokenizer(vocab_size, bos_tok, eos_tok),
       vocab_(std::make_unique<char*[]>(vocab_size)),
       vocab_scores_(std::make_unique<float[]>(vocab_size)),
       sorted_vocab_(std::make_unique<TokenIndex[]>(vocab_size)) {
@@ -47,7 +47,7 @@ Tokenizer::Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok)
  * @param tokenizer_path The path to the tokenizer file.
  * @return Error
  */
-Error Tokenizer::load(const std::string& tokenizer_path) {
+Error BPETokenizer::load(const std::string& tokenizer_path) {
   if (initialized_) {
     ET_LOG(Info, "Tokenizer already initialized");
     return Error::Ok;
@@ -131,7 +131,7 @@ Error Tokenizer::load(const std::string& tokenizer_path) {
   return Error::Ok;
 }
 
-Tokenizer::~Tokenizer() {
+BPETokenizer::~BPETokenizer() {
   for (int i = 0; i < vocab_size_; i++) {
     delete[] vocab_[i];
   }
@@ -145,7 +145,7 @@ Tokenizer::~Tokenizer() {
  * @return Result<std::string> A pointer to the string representation of the
  * token.
  */
-Result<std::string> Tokenizer::decode(uint64_t prev_token, uint64_t token) {
+Result<std::string> BPETokenizer::decode(uint64_t prev_token, uint64_t token) {
   if (!initialized_) {
     ET_LOG(Error, "Tokenizer not initialized");
     return Error::NotSupported;
@@ -187,7 +187,7 @@ str_lookup(const char* str, TokenIndex* sorted_vocab, int32_t vocab_size) {
  * @return Result<std::vector<uint64_t>>
  */
 Result<std::vector<uint64_t>>
-Tokenizer::encode(const std::string& text, int8_t bos, int8_t eos) {
+BPETokenizer::encode(const std::string& text, int8_t bos, int8_t eos) {
   if (!initialized_) {
     ET_LOG(Error, "Tokenizer not initialized");
     return Error::NotSupported;
diff --git a/examples/models/llama2/tokenizer/bpe_tokenizer.h b/examples/models/llama2/tokenizer/bpe_tokenizer.h
new file mode 100644
index 00000000000..82e3f396344
--- /dev/null
+++ b/examples/models/llama2/tokenizer/bpe_tokenizer.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/examples/models/llama2/tokenizer/tokenizer.h>
+#include <cstdint>
+
+namespace torch {
+namespace executor {
+
+struct TokenIndex {
+  const char* str;
+  int32_t id;
+};
+
+class BPETokenizer : public Tokenizer {
+ public:
+  explicit BPETokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok);
+  ~BPETokenizer() override;
+
+  Error load(const std::string& tokenizer_path) override;
+
+  Result<std::vector<uint64_t>>
+  encode(const std::string& input, int8_t bos, int8_t eos) override;
+
+  Result<std::string> decode(uint64_t prev_token, uint64_t token) override;
+
+ private:
+  std::unique_ptr<char*[]> vocab_;
+  std::unique_ptr<float[]> vocab_scores_;
+  std::unique_ptr<TokenIndex[]> sorted_vocab_;
+  unsigned int max_token_length_;
+  unsigned char byte_pieces_[512]; // stores all single-byte strings
+};
+} // namespace executor
+} // namespace torch
diff --git a/examples/models/llama2/tokenizer/targets.bzl b/examples/models/llama2/tokenizer/targets.bzl
index b63f780faa1..b25693558ae 100644
--- a/examples/models/llama2/tokenizer/targets.bzl
+++ b/examples/models/llama2/tokenizer/targets.bzl
@@ -4,10 +4,11 @@ def define_common_targets():
     runtime.cxx_library(
         name = "tokenizer",
         srcs = [
-            "tokenizer.cpp",
+            "bpe_tokenizer.cpp",
         ],
         exported_headers = [
             "tokenizer.h",
+            "bpe_tokenizer.h",
         ],
         exported_deps = [
             "//executorch/runtime/core/exec_aten:lib",
diff --git a/examples/models/llama2/tokenizer/test/test_tokenizer.cpp b/examples/models/llama2/tokenizer/test/test_tokenizer.cpp
index 787f008568c..1d1f83065cf 100644
--- a/examples/models/llama2/tokenizer/test/test_tokenizer.cpp
+++ b/examples/models/llama2/tokenizer/test/test_tokenizer.cpp
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
 #include <executorch/examples/models/llama2/tokenizer/tokenizer.h>
 #include <executorch/runtime/platform/runtime.h>
 #include <gtest/gtest.h>
@@ -20,7 +21,7 @@ class TokenizerExtensionTest : public Test {
  public:
   void SetUp() override {
     torch::executor::runtime_init();
-    tokenizer_ = std::make_unique<Tokenizer>(32000, 1, 2);
+    tokenizer_ = std::make_unique<BPETokenizer>(32000, 1, 2);
     modelPath_ = std::getenv("RESOURCES_PATH") + std::string("/test.bin");
   }
 
diff --git a/examples/models/llama2/tokenizer/tokenizer.h b/examples/models/llama2/tokenizer/tokenizer.h
index 6b03278eace..5e9f0925823 100644
--- a/examples/models/llama2/tokenizer/tokenizer.h
+++ b/examples/models/llama2/tokenizer/tokenizer.h
@@ -26,22 +26,21 @@
 namespace torch {
 namespace executor {
 
-struct TokenIndex {
-  const char* str;
-  int32_t id;
-};
-
 class Tokenizer {
  public:
-  explicit Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok);
-  ~Tokenizer();
+  explicit Tokenizer(int32_t vocab_size, uint64_t bos_tok, uint64_t eos_tok)
+      : initialized_(false),
+        vocab_size_(vocab_size),
+        bos_tok_(bos_tok),
+        eos_tok_(eos_tok) {}
+  virtual ~Tokenizer() {}
 
-  Error load(const std::string& tokenizer_path);
+  virtual Error load(const std::string& tokenizer_path) = 0;
 
-  Result<std::vector<uint64_t>>
-  encode(const std::string& input, int8_t bos, int8_t eos);
+  virtual Result<std::vector<uint64_t>>
+  encode(const std::string& input, int8_t bos, int8_t eos) = 0;
 
-  Result<std::string> decode(uint64_t prev_token, uint64_t token);
+  virtual Result<std::string> decode(uint64_t prev_token, uint64_t token) = 0;
 
   // getters
   int32_t vocab_size() const {
@@ -56,15 +55,10 @@ class Tokenizer {
     return eos_tok_;
   }
 
- private:
+ protected:
   bool initialized_;
   const int32_t vocab_size_;
   uint64_t bos_tok_, eos_tok_;
-  std::unique_ptr<char*[]> vocab_;
-  std::unique_ptr<float[]> vocab_scores_;
-  std::unique_ptr<TokenIndex[]> sorted_vocab_;
-  unsigned int max_token_length_;
-  unsigned char byte_pieces_[512]; // stores all single-byte strings
 };
 
 } // namespace executor