diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9ad59f75..ccdd5eb1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,6 +68,20 @@ set(SOURCE_FILES
     src/log_surgeon/Constants.hpp
     src/log_surgeon/FileReader.cpp
     src/log_surgeon/FileReader.hpp
+    src/log_surgeon/finite_automata/Capture.hpp
+    src/log_surgeon/finite_automata/Dfa.hpp
+    src/log_surgeon/finite_automata/DfaState.hpp
+    src/log_surgeon/finite_automata/DfaStatePair.hpp
+    src/log_surgeon/finite_automata/Nfa.hpp
+    src/log_surgeon/finite_automata/NfaState.hpp
+    src/log_surgeon/finite_automata/PrefixTree.cpp
+    src/log_surgeon/finite_automata/PrefixTree.hpp
+    src/log_surgeon/finite_automata/RegexAST.hpp
+    src/log_surgeon/finite_automata/RegisterHandler.hpp
+    src/log_surgeon/finite_automata/StateType.hpp
+    src/log_surgeon/finite_automata/TaggedTransition.hpp
+    src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp
+    src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp
     src/log_surgeon/Lalr1Parser.cpp
     src/log_surgeon/Lalr1Parser.hpp
     src/log_surgeon/Lalr1Parser.tpp
@@ -93,20 +107,7 @@ set(SOURCE_FILES
     src/log_surgeon/SchemaParser.hpp
     src/log_surgeon/Token.cpp
     src/log_surgeon/Token.hpp
-    src/log_surgeon/finite_automata/PrefixTree.cpp
-    src/log_surgeon/finite_automata/PrefixTree.hpp
-    src/log_surgeon/finite_automata/RegexAST.hpp
-    src/log_surgeon/finite_automata/Dfa.hpp
-    src/log_surgeon/finite_automata/DfaState.hpp
-    src/log_surgeon/finite_automata/DfaStatePair.hpp
-    src/log_surgeon/finite_automata/Nfa.hpp
-    src/log_surgeon/finite_automata/NfaState.hpp
-    src/log_surgeon/finite_automata/RegisterHandler.hpp
-    src/log_surgeon/finite_automata/StateType.hpp
-    src/log_surgeon/finite_automata/Tag.hpp
-    src/log_surgeon/finite_automata/TaggedTransition.hpp
-    src/log_surgeon/finite_automata/UnicodeIntervalTree.hpp
-    src/log_surgeon/finite_automata/UnicodeIntervalTree.tpp
+    src/log_surgeon/UniqueIdGenerator.hpp
     )
 
 set(LCHIP_INSTALL_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/cmake/log_surgeon)
diff --git a/src/log_surgeon/Lexer.hpp b/src/log_surgeon/Lexer.hpp
index 4f68a168..d99f94f0 100644
--- a/src/log_surgeon/Lexer.hpp
+++ b/src/log_surgeon/Lexer.hpp
@@ -15,6 +15,7 @@
 #include <log_surgeon/finite_automata/DfaState.hpp>
 #include <log_surgeon/finite_automata/Nfa.hpp>
 #include <log_surgeon/finite_automata/RegexAST.hpp>
+#include <log_surgeon/finite_automata/RegisterHandler.hpp>
 #include <log_surgeon/LexicalRule.hpp>
 #include <log_surgeon/ParserInputBuffer.hpp>
 #include <log_surgeon/Token.hpp>
@@ -23,6 +24,10 @@ namespace log_surgeon {
 template <typename TypedNfaState, typename TypedDfaState>
 class Lexer {
 public:
+    using register_id_t = finite_automata::RegisterHandler::register_id_t;
+    using symbol_id_t = uint32_t;
+    using tag_id_t = finite_automata::tag_id_t;
+
     static inline std::vector<uint32_t> const cTokenEndTypes = {(uint32_t)SymbolId::TokenEnd};
     static inline std::vector<uint32_t> const cTokenUncaughtStringTypes
             = {(uint32_t)SymbolId::TokenUncaughtString};
@@ -51,7 +56,8 @@ class Lexer {
     auto get_rule(uint32_t variable_id) -> finite_automata::RegexAST<TypedNfaState>*;
 
     /**
-     * Generate DFA for lexer
+     * Generate DFA for lexer.
+     * @throw std::invalid_argument if `m_rules` contains multipe captures with the same name.
      */
     auto generate() -> void;
 
@@ -122,8 +128,48 @@ class Lexer {
         return m_dfa;
     }
 
-    std::unordered_map<std::string, uint32_t> m_symbol_id;
-    std::unordered_map<uint32_t, std::string> m_id_symbol;
+    [[nodiscard]] auto get_capture_ids_for_var_id(symbol_id_t const var_id
+    ) const -> std::optional<std::vector<symbol_id_t>> {
+        auto const capture_ids{m_var_id_to_capture_ids.find(var_id)};
+        if (m_var_id_to_capture_ids.end() == capture_ids) {
+            return std::nullopt;
+        }
+        return capture_ids->second;
+    }
+
+    [[nodiscard]] auto get_tag_ids_for_capture_id(symbol_id_t const capture_id
+    ) const -> std::optional<std::pair<tag_id_t, tag_id_t>> {
+        auto const tag_ids{m_capture_id_to_tag_ids.find(capture_id)};
+        if (m_capture_id_to_tag_ids.end() == tag_ids) {
+            return std::nullopt;
+        }
+        return tag_ids->second;
+    }
+
+    [[nodiscard]] auto get_register_for_tag_id(tag_id_t const tag_id
+    ) const -> std::optional<register_id_t> {
+        auto const it{m_tag_to_register_id.find(tag_id)};
+        if (m_tag_to_register_id.end() == it) {
+            return std::nullopt;
+        }
+        return it->second;
+    }
+
+    [[nodiscard]] auto get_registers_for_capture(symbol_id_t capture_id
+    ) const -> std::optional<std::pair<register_id_t, register_id_t>> {
+        auto const tag_ids{get_tag_ids_for_capture_id(capture_id)};
+        if (tag_ids.has_value()) {
+            auto const start_reg{get_register_for_tag_id(tag_ids.value().first())};
+            auto const end_reg{get_register_for_tag_id(tag_ids.value().second())};
+            if (start_reg.has_value() && end_reg.has_value()) {
+                return std::make_pair(start_reg.value(), end_reg.value());
+            }
+        }
+        return std::nullopt;
+    }
+
+    std::unordered_map<std::string, symbol_id_t> m_symbol_id;
+    std::unordered_map<symbol_id_t, std::string> m_id_symbol;
 
 private:
     /**
@@ -148,6 +194,9 @@ class Lexer {
     std::unique_ptr<finite_automata::Dfa<TypedDfaState>> m_dfa;
     bool m_asked_for_more_data{false};
     TypedDfaState const* m_prev_state{nullptr};
+    std::unordered_map<symbol_id_t, std::vector<symbol_id_t>> m_var_id_to_capture_ids;
+    std::unordered_map<symbol_id_t, std::pair<tag_id_t, tag_id_t>> m_capture_id_to_tag_ids;
+    std::unordered_map<tag_id_t, register_id_t> m_tag_to_register_id;
 };
 
 namespace lexers {
diff --git a/src/log_surgeon/Lexer.tpp b/src/log_surgeon/Lexer.tpp
index a4e36f55..2e91575c 100644
--- a/src/log_surgeon/Lexer.tpp
+++ b/src/log_surgeon/Lexer.tpp
@@ -4,6 +4,7 @@
 #include <cassert>
 #include <memory>
 #include <stack>
+#include <stdexcept>
 #include <string>
 #include <vector>
 
@@ -358,17 +359,17 @@ void Lexer<TypedNfaState, TypedDfaState>::add_delimiters(std::vector<uint32_t> c
 
 template <typename TypedNfaState, typename TypedDfaState>
 void Lexer<TypedNfaState, TypedDfaState>::add_rule(
-        uint32_t const& id,
+        symbol_id_t const& var_id,
         std::unique_ptr<finite_automata::RegexAST<TypedNfaState>> rule
 ) {
-    m_rules.emplace_back(id, std::move(rule));
+    m_rules.emplace_back(var_id, std::move(rule));
 }
 
 template <typename TypedNfaState, typename TypedDfaState>
-auto Lexer<TypedNfaState, TypedDfaState>::get_rule(uint32_t const variable_id
+auto Lexer<TypedNfaState, TypedDfaState>::get_rule(symbol_id_t const var_id
 ) -> finite_automata::RegexAST<TypedNfaState>* {
     for (auto const& rule : m_rules) {
-        if (rule.get_variable_id() == variable_id) {
+        if (rule.get_variable_id() == var_id) {
             return rule.get_regex();
         }
     }
@@ -377,8 +378,30 @@ auto Lexer<TypedNfaState, TypedDfaState>::get_rule(uint32_t const variable_id
 
 template <typename TypedNfaState, typename TypedDfaState>
 void Lexer<TypedNfaState, TypedDfaState>::generate() {
-    finite_automata::Nfa<TypedNfaState> nfa{std::move(m_rules)};
-    // TODO: DFA ignores tags. E.g., treats "capture:user=(?<user_id>\d+)" as "capture:user=\d+"
+    for (auto const& rule : m_rules) {
+        for (auto* capture : rule.get_captures()) {
+            std::string const capture_name{capture->get_name()};
+            symbol_id_t capture_id{0};
+            if (m_symbol_id.find(capture_name) == m_symbol_id.end()) {
+                capture_id = m_symbol_id.size();
+                m_symbol_id[capture_name] = capture_id;
+                m_id_symbol[capture_id] = capture_name;
+            } else {
+                throw std::invalid_argument("`m_rules` contains capture names that are not unique."
+                );
+            }
+            m_var_id_to_capture_ids[rule.get_variable_id()].push_back(capture_id);
+        }
+    }
+
+    finite_automata::Nfa<TypedNfaState> nfa{m_rules};
+    for (auto const& [capture, tag_ids] : nfa.get_capture_to_tag_ids()) {
+        std::string capture_name{capture->get_name()};
+        auto capture_id{m_symbol_id[capture_name]};
+        m_capture_id_to_tag_ids.emplace(capture_id, tag_ids);
+    }
+
+    // TODO: DFA ignores captures. E.g., treats "capture:user=(?<user_id>\d+)" as "capture:user=\d+"
     m_dfa = std::make_unique<finite_automata::Dfa<TypedDfaState>>(std::move(nfa));
     auto const* state = m_dfa->get_root();
     for (uint32_t i = 0; i < cSizeOfByte; i++) {
diff --git a/src/log_surgeon/LexicalRule.hpp b/src/log_surgeon/LexicalRule.hpp
index 6ab7e861..c81456be 100644
--- a/src/log_surgeon/LexicalRule.hpp
+++ b/src/log_surgeon/LexicalRule.hpp
@@ -23,6 +23,10 @@ class LexicalRule {
      */
     auto add_to_nfa(finite_automata::Nfa<TypedNfaState>* nfa) const -> void;
 
+    [[nodiscard]] auto get_captures() const -> std::vector<finite_automata::Capture const*> {
+        return m_regex->get_subtree_positive_captures();
+    }
+
     [[nodiscard]] auto get_variable_id() const -> uint32_t { return m_variable_id; }
 
     [[nodiscard]] auto get_regex() const -> finite_automata::RegexAST<TypedNfaState>* {
diff --git a/src/log_surgeon/SchemaParser.cpp b/src/log_surgeon/SchemaParser.cpp
index d36271ca..1960e997 100644
--- a/src/log_surgeon/SchemaParser.cpp
+++ b/src/log_surgeon/SchemaParser.cpp
@@ -9,8 +9,8 @@
 
 #include <log_surgeon/Constants.hpp>
 #include <log_surgeon/FileReader.hpp>
+#include <log_surgeon/finite_automata/Capture.hpp>
 #include <log_surgeon/finite_automata/RegexAST.hpp>
-#include <log_surgeon/finite_automata/Tag.hpp>
 #include <log_surgeon/Lalr1Parser.hpp>
 #include <log_surgeon/Lexer.hpp>
 #include <log_surgeon/utils.hpp>
@@ -167,7 +167,7 @@ static auto regex_capture_rule(NonTerminal const* m) -> std::unique_ptr<ParserAS
     auto& r6 = m->non_terminal_cast(5)->get_parser_ast()->get<unique_ptr<RegexASTByte>>();
     return std::make_unique<ParserValueRegex>(make_unique<RegexASTCaptureByte>(
             std::move(r6),
-            std::make_unique<finite_automata::Tag>(r4->m_name)
+            std::make_unique<finite_automata::Capture>(r4->m_name)
     ));
 }
 
@@ -202,7 +202,7 @@ static auto regex_or_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
 static auto regex_match_zero_or_more_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
     auto& r1 = m->non_terminal_cast(0)->get_parser_ast()->get<unique_ptr<RegexASTByte>>();
 
-    // To handle negative tags we treat `R*` as `R+ | ∅`.
+    // To handle negative captures we treat `R*` as `R+ | ∅`.
     return make_unique<ParserValueRegex>(make_unique<RegexASTOrByte>(
             make_unique<RegexASTEmptyByte>(),
             make_unique<RegexASTMultiplicationByte>(std::move(r1), 1, 0)
@@ -248,7 +248,7 @@ static auto regex_match_range_rule(NonTerminal* m) -> unique_ptr<ParserAST> {
     auto& r1 = m->non_terminal_cast(0)->get_parser_ast()->get<unique_ptr<RegexASTByte>>();
 
     if (0 == min) {
-        // To handle negative tags we treat `R*` as `R+ | ∅`.
+        // To handle negative captures we treat `R*` as `R+ | ∅`.
         return make_unique<ParserValueRegex>(make_unique<RegexASTOrByte>(
                 make_unique<RegexASTEmptyByte>(),
                 make_unique<RegexASTMultiplicationByte>(std::move(r1), 1, max)
diff --git a/src/log_surgeon/UniqueIdGenerator.hpp b/src/log_surgeon/UniqueIdGenerator.hpp
new file mode 100644
index 00000000..47ab1490
--- /dev/null
+++ b/src/log_surgeon/UniqueIdGenerator.hpp
@@ -0,0 +1,16 @@
+#ifndef LOG_SURGEON_UNIQUEIDGENERATOR_HPP
+#define LOG_SURGEON_UNIQUEIDGENERATOR_HPP
+
+namespace log_surgeon {
+class UniqueIdGenerator {
+public:
+    UniqueIdGenerator() : current_id{0} {}
+
+    [[nodiscard]] auto generate_id() -> uint32_t { return current_id++; }
+
+private:
+    uint32_t current_id;
+};
+}  // namespace log_surgeon
+
+#endif  // LOG_SURGEON_UNIQUEIDGENERATOR_HPP
diff --git a/src/log_surgeon/finite_automata/Tag.hpp b/src/log_surgeon/finite_automata/Capture.hpp
similarity index 55%
rename from src/log_surgeon/finite_automata/Tag.hpp
rename to src/log_surgeon/finite_automata/Capture.hpp
index 3a3b4d7f..84480eab 100644
--- a/src/log_surgeon/finite_automata/Tag.hpp
+++ b/src/log_surgeon/finite_automata/Capture.hpp
@@ -1,14 +1,14 @@
-#ifndef LOG_SURGEON_FINITE_AUTOMATA_TAG
-#define LOG_SURGEON_FINITE_AUTOMATA_TAG
+#ifndef LOG_SURGEON_FINITE_AUTOMATA_CAPTURE
+#define LOG_SURGEON_FINITE_AUTOMATA_CAPTURE
 
 #include <string>
 #include <string_view>
 #include <utility>
 
 namespace log_surgeon::finite_automata {
-class Tag {
+class Capture {
 public:
-    explicit Tag(std::string name) : m_name{std::move(name)} {}
+    explicit Capture(std::string name) : m_name{std::move(name)} {}
 
     [[nodiscard]] auto get_name() const -> std::string_view { return m_name; }
 
@@ -17,4 +17,4 @@ class Tag {
 };
 }  // namespace log_surgeon::finite_automata
 
-#endif  // LOG_SURGEON_FINITE_AUTOMATA_TAG
+#endif  // LOG_SURGEON_FINITE_AUTOMATA_CAPTURE
diff --git a/src/log_surgeon/finite_automata/Dfa.hpp b/src/log_surgeon/finite_automata/Dfa.hpp
index baceaec3..ecafa7e8 100644
--- a/src/log_surgeon/finite_automata/Dfa.hpp
+++ b/src/log_surgeon/finite_automata/Dfa.hpp
@@ -2,12 +2,16 @@
 #define LOG_SURGEON_FINITE_AUTOMATA_DFA_HPP
 
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <set>
+#include <stack>
 #include <vector>
 
+#include <log_surgeon/Constants.hpp>
 #include <log_surgeon/finite_automata/DfaStatePair.hpp>
 #include <log_surgeon/finite_automata/Nfa.hpp>
+#include <log_surgeon/finite_automata/RegisterHandler.hpp>
 
 namespace log_surgeon::finite_automata {
 template <typename TypedDfaState>
@@ -38,6 +42,7 @@ class Dfa {
 
 private:
     std::vector<std::unique_ptr<TypedDfaState>> m_states;
+    RegisterHandler m_register_handler;
 };
 
 template <typename TypedDfaState>
@@ -74,7 +79,7 @@ Dfa<TypedDfaState>::Dfa(Nfa<TypedNfaState> nfa) {
         }
         auto next_dfa_state
                 = [&dfa_states, &create_dfa_state](StateSet const& set) -> TypedDfaState* {
-            TypedDfaState* state;
+            TypedDfaState* state{nullptr};
             auto it = dfa_states.find(set);
             if (it == dfa_states.end()) {
                 state = create_dfa_state(set);
diff --git a/src/log_surgeon/finite_automata/Nfa.hpp b/src/log_surgeon/finite_automata/Nfa.hpp
index 8eaaaadd..30d0266c 100644
--- a/src/log_surgeon/finite_automata/Nfa.hpp
+++ b/src/log_surgeon/finite_automata/Nfa.hpp
@@ -15,14 +15,23 @@
 #include <log_surgeon/Constants.hpp>
 #include <log_surgeon/finite_automata/NfaState.hpp>
 #include <log_surgeon/LexicalRule.hpp>
+#include <log_surgeon/UniqueIdGenerator.hpp>
 
 namespace log_surgeon::finite_automata {
+/**
+ * Represents a NFA(non-deterministic finite automata) for recognizing a language based on the set
+ * of rules used during initialization. Currently use as an intermediate model for generating the
+ * DFA.
+ *
+ * Currently we assume all capture groups have unique names.
+ * @tparam TypedNfaState
+ */
 template <typename TypedNfaState>
 class Nfa {
 public:
     using StateVec = std::vector<TypedNfaState*>;
 
-    explicit Nfa(std::vector<LexicalRule<TypedNfaState>> rules);
+    explicit Nfa(std::vector<LexicalRule<TypedNfaState>> const& rules);
 
     /**
      * Creates a unique_ptr for an NFA state with no tagged transitions and adds it to `m_states`.
@@ -30,40 +39,28 @@ class Nfa {
      */
     [[nodiscard]] auto new_state() -> TypedNfaState*;
 
-    /**
-     * Creates a unique_ptr for an NFA state with a positive tagged end transition and adds it to
-     * `m_states`.
-     * @param tag
-     * @param dest_state
-     * @return A new state with a positive tagged end transition to `dest_state`.
-     */
-    [[nodiscard]] auto new_state_with_positive_tagged_end_transition(
-            Tag const* tag,
-            TypedNfaState const* dest_state
-    ) -> TypedNfaState*;
-
     /**
      * Creates a unique_ptr for an NFA state with a negative tagged transition and adds it to
      * `m_states`.
-     * @param tags
+     * @param captures
      * @param dest_state
      * @return TypedNfaState*
      */
     [[nodiscard]] auto new_state_with_negative_tagged_transition(
-            std::vector<Tag const*> tags,
+            std::vector<Capture const*> const& captures,
             TypedNfaState const* dest_state
     ) -> TypedNfaState*;
 
     /**
      * Creates the start and end states for a capture group.
-     * @param tag The tag associated with the capture group.
+     * @param capture The capture associated with the capture group.
      * @param dest_state
      * @return A pair of states:
      * - A new state with a positive tagged start transition from `m_root`.
      * - A new state with a positive tagged end transition to `dest_state`.
      */
     [[nodiscard]] auto new_start_and_end_states_with_positive_tagged_transitions(
-            Tag const* tag,
+            Capture const* capture,
             TypedNfaState const* dest_state
     ) -> std::pair<TypedNfaState*, TypedNfaState*>;
 
@@ -86,23 +83,61 @@ class Nfa {
 
     auto get_root() -> TypedNfaState* { return m_root; }
 
+    [[nodiscard]] auto get_capture_to_tag_ids(
+    ) const -> std::unordered_map<Capture const*, std::pair<tag_id_t, tag_id_t>> {
+        return m_capture_to_tag_ids;
+    }
+
 private:
+    /**
+     * Creates start and end tags for the specified capture if they don't currently exist.
+     * @param capture
+     * @return The start and end tags corresponding to `capture`.
+     */
+    auto get_or_create_capture_tags(Capture const* capture) -> std::pair<tag_id_t, tag_id_t>;
+
+    /**
+     * Creates a `unique_ptr` for an NFA state with a positive tagged end transition and adds it to
+     * `m_states`.
+     * @param tag_id
+     * @param dest_state
+     * @return A new state with a positive tagged end transition to `dest_state`.
+     */
+    [[nodiscard]] auto new_state_with_positive_tagged_end_transition(
+            tag_id_t tag_id,
+            TypedNfaState const* dest_state
+    ) -> TypedNfaState*;
+
     std::vector<std::unique_ptr<TypedNfaState>> m_states;
+    // TODO: Lexer currently enforces unique naming across capture groups. However, this limits use
+    // cases. Possibly initialize this in the lexer and pass it in during construction.
+    std::unordered_map<Capture const*, std::pair<tag_id_t, tag_id_t>> m_capture_to_tag_ids;
     TypedNfaState* m_root;
-    // Store the rules locally as they contain information needed by the NFA. E.g., transitions in
-    // the NFA point to tags in the rule ASTs.
-    std::vector<LexicalRule<TypedNfaState>> m_rules;
+    UniqueIdGenerator m_unique_id_generator;
 };
 
 template <typename TypedNfaState>
-Nfa<TypedNfaState>::Nfa(std::vector<LexicalRule<TypedNfaState>> rules)
-        : m_root{new_state()},
-          m_rules{std::move(rules)} {
-    for (auto const& rule : m_rules) {
+Nfa<TypedNfaState>::Nfa(std::vector<LexicalRule<TypedNfaState>> const& rules)
+        : m_root{new_state()} {
+    for (auto const& rule : rules) {
         rule.add_to_nfa(this);
     }
 }
 
+template <typename TypedNfaState>
+auto Nfa<TypedNfaState>::get_or_create_capture_tags(Capture const* capture
+) -> std::pair<tag_id_t, tag_id_t> {
+    auto const existing_tags{m_capture_to_tag_ids.find(capture)};
+    if (m_capture_to_tag_ids.end() == existing_tags) {
+        auto start_tag{m_unique_id_generator.generate_id()};
+        auto end_tag{m_unique_id_generator.generate_id()};
+        auto new_tags{std::make_pair(start_tag, end_tag)};
+        m_capture_to_tag_ids.emplace(capture, new_tags);
+        return new_tags;
+    }
+    return existing_tags->second;
+}
+
 template <typename TypedNfaState>
 auto Nfa<TypedNfaState>::new_state() -> TypedNfaState* {
     m_states.emplace_back(std::make_unique<TypedNfaState>());
@@ -111,31 +146,38 @@ auto Nfa<TypedNfaState>::new_state() -> TypedNfaState* {
 
 template <typename TypedNfaState>
 auto Nfa<TypedNfaState>::new_state_with_positive_tagged_end_transition(
-        Tag const* tag,
+        tag_id_t const tag_id,
         TypedNfaState const* dest_state
 ) -> TypedNfaState* {
-    m_states.emplace_back(std::make_unique<TypedNfaState>(tag, dest_state));
+    m_states.emplace_back(std::make_unique<TypedNfaState>(tag_id, dest_state));
     return m_states.back().get();
 }
 
 template <typename TypedNfaState>
 auto Nfa<TypedNfaState>::new_state_with_negative_tagged_transition(
-        std::vector<Tag const*> tags,
+        std::vector<Capture const*> const& captures,
         TypedNfaState const* dest_state
 ) -> TypedNfaState* {
+    std::vector<tag_id_t> tags;
+    for (auto const capture : captures) {
+        auto [start_tag, end_tag]{get_or_create_capture_tags(capture)};
+        tags.push_back(start_tag);
+        tags.push_back(end_tag);
+    }
+
     m_states.emplace_back(std::make_unique<TypedNfaState>(std::move(tags), dest_state));
     return m_states.back().get();
 }
 
 template <typename TypedNfaState>
 auto Nfa<TypedNfaState>::new_start_and_end_states_with_positive_tagged_transitions(
-        Tag const* tag,
+        Capture const* capture,
         TypedNfaState const* dest_state
 ) -> std::pair<TypedNfaState*, TypedNfaState*> {
+    auto [start_tag, end_tag]{get_or_create_capture_tags(capture)};
     auto* start_state = new_state();
-    m_root->add_positive_tagged_start_transition(tag, start_state);
-
-    auto* end_state = new_state_with_positive_tagged_end_transition(tag, dest_state);
+    m_root->add_positive_tagged_start_transition(start_tag, start_state);
+    auto* end_state{new_state_with_positive_tagged_end_transition(end_tag, dest_state)};
     return {start_state, end_state};
 }
 
diff --git a/src/log_surgeon/finite_automata/NfaState.hpp b/src/log_surgeon/finite_automata/NfaState.hpp
index 590c1607..a3b46ba0 100644
--- a/src/log_surgeon/finite_automata/NfaState.hpp
+++ b/src/log_surgeon/finite_automata/NfaState.hpp
@@ -18,6 +18,7 @@
 #include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
 
 namespace log_surgeon::finite_automata {
+
 template <StateType state_type>
 class NfaState;
 
@@ -31,11 +32,12 @@ class NfaState {
 
     NfaState() = default;
 
-    NfaState(Tag const* tag, NfaState const* dest_state)
-            : m_positive_tagged_end_transition{PositiveTaggedTransition{tag, dest_state}} {}
+    NfaState(tag_id_t tag_id, NfaState const* dest_state)
+            : m_positive_tagged_end_transition{PositiveTaggedTransition{tag_id, dest_state}} {}
 
-    NfaState(std::vector<Tag const*> tags, NfaState const* dest_state)
-            : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tags), dest_state}} {}
+    NfaState(std::vector<tag_id_t> tag_ids, NfaState const* dest_state)
+            : m_negative_tagged_transition{NegativeTaggedTransition{std::move(tag_ids), dest_state}
+              } {}
 
     auto set_accepting(bool accepting) -> void { m_accepting = accepting; }
 
@@ -49,8 +51,9 @@ class NfaState {
         return m_matching_variable_id;
     }
 
-    auto add_positive_tagged_start_transition(Tag const* tag, NfaState const* dest_state) -> void {
-        m_positive_tagged_start_transitions.emplace_back(tag, dest_state);
+    auto add_positive_tagged_start_transition(tag_id_t const tag_id, NfaState const* dest_state)
+            -> void {
+        m_positive_tagged_start_transitions.emplace_back(tag_id, dest_state);
     }
 
     [[nodiscard]] auto get_positive_tagged_start_transitions(
diff --git a/src/log_surgeon/finite_automata/PrefixTree.hpp b/src/log_surgeon/finite_automata/PrefixTree.hpp
index e2de78aa..fc047d2a 100644
--- a/src/log_surgeon/finite_automata/PrefixTree.hpp
+++ b/src/log_surgeon/finite_automata/PrefixTree.hpp
@@ -12,11 +12,12 @@ namespace log_surgeon::finite_automata {
  * Represents a prefix tree to store register data during TDFA simulation. Each node in the tree
  * stores a single position in the lexed string. Each path from the root to an index corresponds to
  * a sequence of positions for an individual tag:
- * - Positive position node: Indicates the tag was matched at the position.
- * - Negative position node: Indicates the tag was unmatched. If a negative node is the entire path,
- *   it indicates the tag was never matched. If the negative tag is along a path containing positive
- *   nodes, it functions as a placeholder. This can be useful for nested capture groups, to maintain
- *   a one-to-one mapping between the contained capture group and the enclosing capture group.
+ * - Positive position node: Indicates the capture was matched at the position.
+ * - Negative position node: Indicates the capture was unmatched. If a negative node is the entire
+ *   path, it indicates the tag was never matched. If the negative tag is along a path containing
+ *   positive nodes, it functions as a placeholder. This can be useful for nested capture groups, to
+ *   maintain a one-to-one mapping between the contained capture group and the enclosing capture
+ *   group.
  */
 class PrefixTree {
 public:
diff --git a/src/log_surgeon/finite_automata/RegexAST.hpp b/src/log_surgeon/finite_automata/RegexAST.hpp
index bb55f62d..eada30b1 100644
--- a/src/log_surgeon/finite_automata/RegexAST.hpp
+++ b/src/log_surgeon/finite_automata/RegexAST.hpp
@@ -20,7 +20,7 @@
 #include <fmt/xchar.h>
 
 #include <log_surgeon/Constants.hpp>
-#include <log_surgeon/finite_automata/Tag.hpp>
+#include <log_surgeon/finite_automata/Capture.hpp>
 #include <log_surgeon/finite_automata/UnicodeIntervalTree.hpp>
 
 namespace log_surgeon::finite_automata {
@@ -30,12 +30,12 @@ class Nfa;
 // TODO: rename `RegexAST` to `RegexASTNode`
 /**
  * Base class for a Regex AST node.
- * Unique integer tags are used to differentiate each capture group node. Every node will maintain
- * two sets of tags:
- * 1. `m_subtree_positive_tags`: the set of tags matched by all capture groups within the subtree
- *    rooted at this node.
- * 2. `m_negative_tags`: the set of tags that are guaranteed to be unmatched when traversing this
- *    node, as the alternative path contains these tags.
+ * Unique capture pointers are used to differentiate each capture group node. Every node will
+ * maintain two sets of captures:
+ * 1. `m_subtree_positive_captures`: the set of captures matched by all capture groups within the
+ *    subtree rooted at this node.
+ * 2. `m_negative_captures`: the set of captures that are guaranteed to be unmatched when traversing
+ *    this node, as the alternative path contains these captures.
  *
  * ASTs built using this class are assumed to be constructed in a bottom-up manner, where all
  * descendant nodes are created first.
@@ -83,24 +83,26 @@ class RegexAST {
      */
     [[nodiscard]] virtual auto serialize() const -> std::u32string = 0;
 
-    [[nodiscard]] auto get_subtree_positive_tags() const -> std::vector<Tag const*> const& {
-        return m_subtree_positive_tags;
+    [[nodiscard]] auto get_subtree_positive_captures() const -> std::vector<Capture const*> const& {
+        return m_subtree_positive_captures;
     }
 
-    auto set_subtree_positive_tags(std::vector<Tag const*> subtree_positive_tags) -> void {
-        m_subtree_positive_tags = std::move(subtree_positive_tags);
+    auto set_subtree_positive_captures(std::vector<Capture const*> subtree_positive_captures
+    ) -> void {
+        m_subtree_positive_captures = std::move(subtree_positive_captures);
     }
 
-    auto add_subtree_positive_tags(std::vector<Tag const*> const& subtree_positive_tags) -> void {
-        m_subtree_positive_tags.insert(
-                m_subtree_positive_tags.end(),
-                subtree_positive_tags.cbegin(),
-                subtree_positive_tags.cend()
+    auto add_subtree_positive_captures(std::vector<Capture const*> const& subtree_positive_captures
+    ) -> void {
+        m_subtree_positive_captures.insert(
+                m_subtree_positive_captures.end(),
+                subtree_positive_captures.cbegin(),
+                subtree_positive_captures.cend()
         );
     }
 
-    auto set_negative_tags(std::vector<Tag const*> negative_tags) -> void {
-        m_negative_tags = std::move(negative_tags);
+    auto set_negative_captures(std::vector<Capture const*> negative_captures) -> void {
+        m_negative_captures = std::move(negative_captures);
     }
 
     /**
@@ -110,11 +112,13 @@ class RegexAST {
      */
     auto
     add_to_nfa_with_negative_tags(Nfa<TypedNfaState>* nfa, TypedNfaState* end_state) const -> void {
-        // Handle negative tags as:
-        // root --(regex)--> state_with_negative_tagged_transition --(negative tags)--> end_state
-        if (false == m_negative_tags.empty()) {
-            auto* state_with_negative_tagged_transition
-                    = nfa->new_state_with_negative_tagged_transition(m_negative_tags, end_state);
+        // Handle negative captures as:
+        // root --(regex)--> state_with_negative_tagged_transition --(negative captures)-->
+        // end_state
+        if (false == m_negative_captures.empty()) {
+            auto* state_with_negative_tagged_transition{
+                    nfa->new_state_with_negative_tagged_transition(m_negative_captures, end_state)
+            };
             add_to_nfa(nfa, state_with_negative_tagged_transition);
         } else {
             add_to_nfa(nfa, end_state);
@@ -127,27 +131,30 @@ class RegexAST {
     RegexAST(RegexAST&& rhs) noexcept = delete;
     auto operator=(RegexAST&& rhs) noexcept -> RegexAST& = delete;
 
-    [[nodiscard]] auto serialize_negative_tags() const -> std::u32string {
-        if (m_negative_tags.empty()) {
+    [[nodiscard]] auto serialize_negative_captures() const -> std::u32string {
+        if (m_negative_captures.empty()) {
             return U"";
         }
 
-        auto const transformed_negative_tags
-                = m_negative_tags | std::ranges::views::transform([](Tag const* tag) {
-                      return fmt::format("<~{}>", tag->get_name());
-                  });
-        auto const negative_tags_string
-                = fmt::format("{}", fmt::join(transformed_negative_tags, ""));
+        auto const transformed_negative_captures{
+                m_negative_captures | std::ranges::views::transform([](Capture const* capture) {
+                    return fmt::format("<~{}>", capture->get_name());
+                })
+        };
+        auto const negative_captures_string{
+                fmt::format("{}", fmt::join(transformed_negative_captures, ""))
+        };
 
         return fmt::format(
                 U"{}",
-                std::u32string(negative_tags_string.begin(), negative_tags_string.end())
+                std::u32string(negative_captures_string.begin(), negative_captures_string.end())
         );
     }
 
 private:
-    std::vector<Tag const*> m_subtree_positive_tags;
-    std::vector<Tag const*> m_negative_tags;
+    std::vector<uint32_t> m_subtree_capture_ids;
+    std::vector<Capture const*> m_subtree_positive_captures;
+    std::vector<Capture const*> m_negative_captures;
 };
 
 /**
@@ -624,7 +631,7 @@ class RegexASTMultiplication : public RegexAST<TypedNfaState> {
 /**
  * Represents a capture group AST node.
  * NOTE:
- * - `m_tag` is always expected to be non-null.
+ * - `m_capture` is always expected to be non-null.
  * - `m_group_regex_ast` is always expected to be non-null.
  * @tparam TypedNfaState Specifies the type of transition (bytes or UTF-8 characters).
  */
@@ -635,24 +642,26 @@ class RegexASTCapture : public RegexAST<TypedNfaState> {
 
     /**
      * @param group_regex_ast
-     * @param tag
-     * @throw std::invalid_argument if `group_regex_ast` or `tag` are `nullptr`.
+     * @param capture
+     * @throw std::invalid_argument if `group_regex_ast` or `capture` are `nullptr`.
      */
     RegexASTCapture(
             std::unique_ptr<RegexAST<TypedNfaState>> group_regex_ast,
-            std::unique_ptr<Tag> tag
+            std::unique_ptr<Capture> capture
     )
             : m_group_regex_ast{(
                       nullptr == group_regex_ast
                               ? throw std::invalid_argument("Group regex AST cannot be null")
                               : std::move(group_regex_ast)
               )},
-              m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null")
-                                   : std::move(tag)} {
-        RegexAST<TypedNfaState>::set_subtree_positive_tags(
-                m_group_regex_ast->get_subtree_positive_tags()
+              m_capture{
+                      nullptr == capture ? throw std::invalid_argument("Capture cannot be null")
+                                         : std::move(capture)
+              } {
+        RegexAST<TypedNfaState>::set_subtree_positive_captures(
+                m_group_regex_ast->get_subtree_positive_captures()
         );
-        RegexAST<TypedNfaState>::add_subtree_positive_tags({m_tag.get()});
+        RegexAST<TypedNfaState>::add_subtree_positive_captures({m_capture.get()});
     }
 
     RegexASTCapture(RegexASTCapture const& rhs)
@@ -660,8 +669,8 @@ class RegexASTCapture : public RegexAST<TypedNfaState> {
               m_group_regex_ast{
                       std::unique_ptr<RegexAST<TypedNfaState>>(rhs.m_group_regex_ast->clone())
               },
-              m_tag{std::make_unique<Tag>(*rhs.m_tag)} {
-        RegexAST<TypedNfaState>::set_subtree_positive_tags(rhs.get_subtree_positive_tags());
+              m_capture{std::make_unique<Capture>(*rhs.m_capture)} {
+        RegexAST<TypedNfaState>::set_subtree_positive_captures(rhs.get_subtree_positive_captures());
     }
 
     /**
@@ -701,7 +710,7 @@ class RegexASTCapture : public RegexAST<TypedNfaState> {
 
     [[nodiscard]] auto serialize() const -> std::u32string override;
 
-    [[nodiscard]] auto get_group_name() const -> std::string_view { return m_tag->get_name(); }
+    [[nodiscard]] auto get_group_name() const -> std::string_view { return m_capture->get_name(); }
 
     [[nodiscard]] auto get_group_regex_ast(
     ) const -> std::unique_ptr<RegexAST<TypedNfaState>> const& {
@@ -710,12 +719,12 @@ class RegexASTCapture : public RegexAST<TypedNfaState> {
 
 private:
     std::unique_ptr<RegexAST<TypedNfaState>> m_group_regex_ast;
-    std::unique_ptr<Tag> m_tag;
+    std::unique_ptr<Capture> m_capture;
 };
 
 template <typename TypedNfaState>
 [[nodiscard]] auto RegexASTEmpty<TypedNfaState>::serialize() const -> std::u32string {
-    return fmt::format(U"{}", RegexAST<TypedNfaState>::serialize_negative_tags());
+    return fmt::format(U"{}", RegexAST<TypedNfaState>::serialize_negative_captures());
 }
 
 template <typename TypedNfaState>
@@ -732,7 +741,7 @@ template <typename TypedNfaState>
     return fmt::format(
             U"{}{}",
             static_cast<char32_t>(m_character),
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -763,7 +772,7 @@ template <typename TypedNfaState>
     return fmt::format(
             U"{}{}",
             std::u32string(digits_string.begin(), digits_string.end()),
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -774,10 +783,11 @@ RegexASTOr<TypedNfaState>::RegexASTOr(
 )
         : m_left(std::move(left)),
           m_right(std::move(right)) {
-    m_left->set_negative_tags(m_right->get_subtree_positive_tags());
-    m_right->set_negative_tags(m_left->get_subtree_positive_tags());
-    RegexAST<TypedNfaState>::set_subtree_positive_tags(m_left->get_subtree_positive_tags());
-    RegexAST<TypedNfaState>::add_subtree_positive_tags(m_right->get_subtree_positive_tags());
+    m_left->set_negative_captures(m_right->get_subtree_positive_captures());
+    m_right->set_negative_captures(m_left->get_subtree_positive_captures());
+    RegexAST<TypedNfaState>::set_subtree_positive_captures(m_left->get_subtree_positive_captures());
+    RegexAST<TypedNfaState>::add_subtree_positive_captures(m_right->get_subtree_positive_captures()
+    );
 }
 
 template <typename TypedNfaState>
@@ -793,7 +803,7 @@ template <typename TypedNfaState>
             U"({})|({}){}",
             nullptr != m_left ? m_left->serialize() : U"null",
             nullptr != m_right ? m_right->serialize() : U"null",
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -804,8 +814,9 @@ RegexASTCat<TypedNfaState>::RegexASTCat(
 )
         : m_left(std::move(left)),
           m_right(std::move(right)) {
-    RegexAST<TypedNfaState>::set_subtree_positive_tags(m_left->get_subtree_positive_tags());
-    RegexAST<TypedNfaState>::add_subtree_positive_tags(m_right->get_subtree_positive_tags());
+    RegexAST<TypedNfaState>::set_subtree_positive_captures(m_left->get_subtree_positive_captures());
+    RegexAST<TypedNfaState>::add_subtree_positive_captures(m_right->get_subtree_positive_captures()
+    );
 }
 
 template <typename TypedNfaState>
@@ -825,7 +836,7 @@ template <typename TypedNfaState>
             U"{}{}{}",
             nullptr != m_left ? m_left->serialize() : U"null",
             nullptr != m_right ? m_right->serialize() : U"null",
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -838,7 +849,8 @@ RegexASTMultiplication<TypedNfaState>::RegexASTMultiplication(
         : m_operand(std::move(operand)),
           m_min(min),
           m_max(max) {
-    RegexAST<TypedNfaState>::set_subtree_positive_tags(m_operand->get_subtree_positive_tags());
+    RegexAST<TypedNfaState>::set_subtree_positive_captures(m_operand->get_subtree_positive_captures(
+    ));
 }
 
 template <typename TypedNfaState>
@@ -887,7 +899,7 @@ template <typename TypedNfaState>
             nullptr != m_operand ? m_operand->serialize() : U"null",
             std::u32string(min_string.begin(), min_string.end()),
             is_infinite() ? U"inf" : std::u32string(max_string.begin(), max_string.end()),
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -900,7 +912,7 @@ auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNf
     //         +---------------------+
     //         |       `m_root`      |
     //         +---------------------+
-    //                    | `m_tag` start
+    //                    | `m_capture` start
     //                    | (positive tagged start transition)
     //                    v
     //         +---------------------+
@@ -913,13 +925,13 @@ auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNf
     //         | `m_group_regex_ast` |
     //         |    (nested NFA)     |
     //         +---------------------+
-    //                    | `m_negative_tags`
+    //                    | `m_negative_captures`
     //                    | (negative tagged transition)
     //                    v
     //         +---------------------+
     //         | `capture_end_state` |
     //         +---------------------+
-    //                    | `m_tag` end
+    //                    | `m_capture` end
     //                    | (positive tagged end transition)
     //                    v
     //         +---------------------+
@@ -927,7 +939,7 @@ auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNf
     //         +---------------------+
     auto [capture_start_state, capture_end_state]
             = nfa->new_start_and_end_states_with_positive_tagged_transitions(
-                    m_tag.get(),
+                    m_capture.get(),
                     dest_state
             );
 
@@ -939,12 +951,14 @@ auto RegexASTCapture<TypedNfaState>::add_to_nfa(Nfa<TypedNfaState>* nfa, TypedNf
 
 template <typename TypedNfaState>
 [[nodiscard]] auto RegexASTCapture<TypedNfaState>::serialize() const -> std::u32string {
-    auto const tag_name_u32 = std::u32string(m_tag->get_name().cbegin(), m_tag->get_name().cend());
+    auto const capture_name_u32{
+            std::u32string(m_capture->get_name().cbegin(), m_capture->get_name().cend())
+    };
     return fmt::format(
             U"({})<{}>{}",
             m_group_regex_ast->serialize(),
-            tag_name_u32,
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            capture_name_u32,
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 
@@ -1100,7 +1114,7 @@ template <typename TypedNfaState>
             U"[{}{}]{}",
             m_negate ? U"^" : U"",
             ranges_serialized,
-            RegexAST<TypedNfaState>::serialize_negative_tags()
+            RegexAST<TypedNfaState>::serialize_negative_captures()
     );
 }
 }  // namespace log_surgeon::finite_automata
diff --git a/src/log_surgeon/finite_automata/RegisterHandler.hpp b/src/log_surgeon/finite_automata/RegisterHandler.hpp
index d61240e3..be56cb47 100644
--- a/src/log_surgeon/finite_automata/RegisterHandler.hpp
+++ b/src/log_surgeon/finite_automata/RegisterHandler.hpp
@@ -2,6 +2,7 @@
 #define LOG_SURGEON_FINITE_AUTOMATA_REGISTER_HANDLER_HPP
 
 #include <cstddef>
+#include <cstdint>
 #include <vector>
 
 #include <log_surgeon/finite_automata/PrefixTree.hpp>
@@ -17,6 +18,8 @@ namespace log_surgeon::finite_automata {
  */
 class RegisterHandler {
 public:
+    using register_id_t = uint32_t;
+
     auto add_register(
             PrefixTree::id_t const prefix_tree_parent_node_id,
             PrefixTree::position_t const position
diff --git a/src/log_surgeon/finite_automata/TaggedTransition.hpp b/src/log_surgeon/finite_automata/TaggedTransition.hpp
index 43315b2a..e2e44d36 100644
--- a/src/log_surgeon/finite_automata/TaggedTransition.hpp
+++ b/src/log_surgeon/finite_automata/TaggedTransition.hpp
@@ -10,24 +10,18 @@
 
 #include <fmt/format.h>
 
-#include <log_surgeon/finite_automata/Tag.hpp>
-
 namespace log_surgeon::finite_automata {
+using tag_id_t = std::uint32_t;
+
 /**
- * Represents an NFA transition indicating that a capture group has been matched.
- * NOTE: `m_tag` is always expected to be non-null.
+ * Represents an NFA transition indicating that a tag has been matched.
  * @tparam TypedNfaState Specifies the type of transition (bytes or UTF-8 characters).
  */
 template <typename TypedNfaState>
 class PositiveTaggedTransition {
 public:
-    /**
-     * @param tag
-     * @param dest_state
-     * @throw std::invalid_argument if `tag` is `nullptr`.
-     */
-    PositiveTaggedTransition(Tag const* tag, TypedNfaState const* dest_state)
-            : m_tag{nullptr == tag ? throw std::invalid_argument("Tag cannot be null") : tag},
+    PositiveTaggedTransition(tag_id_t const tag_id, TypedNfaState const* dest_state)
+            : m_tag_id{tag_id},
               m_dest_state{dest_state} {}
 
     [[nodiscard]] auto get_dest_state() const -> TypedNfaState const* { return m_dest_state; }
@@ -43,34 +37,23 @@ class PositiveTaggedTransition {
         if (state_id_it == state_ids.end()) {
             return std::nullopt;
         }
-        return fmt::format("{}[{}]", state_id_it->second, m_tag->get_name());
+        return fmt::format("{}[{}]", state_id_it->second, m_tag_id);
     }
 
 private:
-    Tag const* m_tag;
+    tag_id_t m_tag_id;
     TypedNfaState const* m_dest_state;
 };
 
 /**
- * Represents an NFA transition indicating that a capture group has been unmatched.
- * NOTE: All tags in `m_tags` are always expected to be non-null.
+ * Represents an NFA transition indicating that a tag has been unmatched.
  * @tparam TypedNfaState Specifies the type of transition (bytes or UTF-8 characters).
  */
 template <typename TypedNfaState>
 class NegativeTaggedTransition {
 public:
-    /**
-     * @param tags
-     * @param dest_state
-     * @throw std::invalid_argument if any elements in `tags` is `nullptr`.
-     */
-    NegativeTaggedTransition(std::vector<Tag const*> tags, TypedNfaState const* dest_state)
-            : m_tags{[&tags] {
-                  if (std::ranges::any_of(tags, [](Tag const* tag) { return nullptr == tag; })) {
-                      throw std::invalid_argument("Tags cannot contain null elements");
-                  }
-                  return std::move(tags);
-              }()},
+    NegativeTaggedTransition(std::vector<tag_id_t> tag_ids, TypedNfaState const* dest_state)
+            : m_tag_ids{std::move(tag_ids)},
               m_dest_state{dest_state} {}
 
     [[nodiscard]] auto get_dest_state() const -> TypedNfaState const* { return m_dest_state; }
@@ -86,14 +69,11 @@ class NegativeTaggedTransition {
         if (state_id_it == state_ids.end()) {
             return std::nullopt;
         }
-
-        auto const tag_names = m_tags | std::ranges::views::transform(&Tag::get_name);
-
-        return fmt::format("{}[{}]", state_id_it->second, fmt::join(tag_names, ","));
+        return fmt::format("{}[{}]", state_id_it->second, fmt::join(m_tag_ids, ","));
     }
 
 private:
-    std::vector<Tag const*> m_tags;
+    std::vector<tag_id_t> m_tag_ids;
     TypedNfaState const* m_dest_state;
 };
 }  // namespace log_surgeon::finite_automata
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 652ecebc..edf87095 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,14 +2,14 @@ set(
         SOURCES_LOG_SURGEON
         ../src/log_surgeon/FileReader.cpp
         ../src/log_surgeon/FileReader.hpp
+        ../src/log_surgeon/finite_automata/Capture.hpp
+        ../src/log_surgeon/finite_automata/Nfa.hpp
+        ../src/log_surgeon/finite_automata/NfaState.hpp
         ../src/log_surgeon/finite_automata/PrefixTree.cpp
         ../src/log_surgeon/finite_automata/PrefixTree.hpp
         ../src/log_surgeon/finite_automata/RegexAST.hpp
-        ../src/log_surgeon/finite_automata/Nfa.hpp
-        ../src/log_surgeon/finite_automata/NfaState.hpp
         ../src/log_surgeon/finite_automata/RegisterHandler.hpp
         ../src/log_surgeon/finite_automata/StateType.hpp
-        ../src/log_surgeon/finite_automata/Tag.hpp
         ../src/log_surgeon/finite_automata/TaggedTransition.hpp
         ../src/log_surgeon/Lalr1Parser.cpp
         ../src/log_surgeon/Lalr1Parser.hpp
@@ -22,9 +22,17 @@ set(
         ../src/log_surgeon/SchemaParser.hpp
         ../src/log_surgeon/Token.cpp
         ../src/log_surgeon/Token.hpp
+        ../src/log_surgeon/UniqueIdGenerator.hpp
 )
 
-set(SOURCES_TESTS test-lexer.cpp test-nfa.cpp test-prefix-tree.cpp test-register-handler.cpp test-tag.cpp)
+set(
+        SOURCES_TESTS
+        test-lexer.cpp
+        test-nfa.cpp
+        test-prefix-tree.cpp
+        test-register-handler.cpp
+        test-capture.cpp
+)
 
 add_executable(unit-test ${SOURCES_LOG_SURGEON} ${SOURCES_TESTS})
 target_link_libraries(unit-test PRIVATE Catch2::Catch2WithMain log_surgeon::log_surgeon)
diff --git a/tests/test-capture.cpp b/tests/test-capture.cpp
new file mode 100644
index 00000000..28c2b2df
--- /dev/null
+++ b/tests/test-capture.cpp
@@ -0,0 +1,34 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <log_surgeon/finite_automata/Capture.hpp>
+
+using log_surgeon::finite_automata::Capture;
+
+TEST_CASE("Capture operations", "[Capture]") {
+    SECTION("Basic name retrieval works correctly") {
+        Capture const capture{"uID"};
+        REQUIRE("uID" == capture.get_name());
+    }
+
+    SECTION("Empty capture name is handled correctly") {
+        Capture const empty_capture{""};
+        REQUIRE(empty_capture.get_name().empty());
+    }
+
+    SECTION("Special characters in capture names are preserved") {
+        Capture const special_capture{"user.id-123_@"};
+        REQUIRE("user.id-123_@" == special_capture.get_name());
+    }
+
+    SECTION("Copy constructor works correctly") {
+        Capture assign_capture{"target"};
+        assign_capture = Capture{"new_source"};
+        REQUIRE("new_source" == assign_capture.get_name());
+    }
+
+    SECTION("Move constructor works correctly") {
+        Capture original_capture{"source"};
+        Capture moved_capture{std::move(original_capture)};
+        REQUIRE("source" == moved_capture.get_name());
+    }
+}
diff --git a/tests/test-lexer.cpp b/tests/test-lexer.cpp
index 48b2185c..d93a8ea0 100644
--- a/tests/test-lexer.cpp
+++ b/tests/test-lexer.cpp
@@ -6,12 +6,18 @@
 
 #include <catch2/catch_test_macros.hpp>
 
+#include <log_surgeon/Constants.hpp>
 #include <log_surgeon/finite_automata/Nfa.hpp>
 #include <log_surgeon/finite_automata/RegexAST.hpp>
 #include <log_surgeon/Schema.hpp>
 #include <log_surgeon/SchemaParser.hpp>
 
+using log_surgeon::lexers::ByteLexer;
+using log_surgeon::Schema;
+using log_surgeon::SchemaAST;
+using log_surgeon::SymbolId;
 using std::codecvt_utf8;
+using std::make_unique;
 using std::string;
 using std::string_view;
 using std::u32string;
@@ -49,9 +55,25 @@ auto test_regex_ast(string_view var_schema, u32string const& expected_serialized
  */
 [[nodiscard]] auto u32string_to_string(u32string const& u32_str) -> string;
 
+/**
+ * Initiailizes the lexer with the constant delimiters and the given schema.
+ * @param schema Contains the variables to add to the lexer.
+ * @param lexer Returns the initiailzed parser.
+ */
+auto initialize_lexer(Schema schema, ByteLexer& lexer) -> void;
+
+/**
+ * Scans the given input to ensure the correct behavior.
+ * @param lexer The lexer to scan the input with.
+ * @param input The input to test.
+ * @param symbol The expected symbol to match.
+ */
+auto test_scanning_input(ByteLexer const& lexer, std::string_view input, std::string_view symbol)
+        -> void;
+
 auto test_regex_ast(string_view const var_schema, u32string const& expected_serialized_ast)
         -> void {
-    log_surgeon::Schema schema;
+    Schema schema;
     schema.add_variable(var_schema, -1);
 
     auto const schema_ast = schema.release_schema_ast_ptr();
@@ -67,11 +89,73 @@ auto u32string_to_string(u32string const& u32_str) -> string {
     wstring_convert<codecvt_utf8<char32_t>, char32_t> converter;
     return converter.to_bytes(u32_str.data(), u32_str.data() + u32_str.size());
 }
+
+auto initialize_lexer(std::unique_ptr<SchemaAST> schema_ast, ByteLexer& lexer) -> void {
+    vector<uint32_t> const cDelimiters{' ', '\n'};
+    lexer.add_delimiters(cDelimiters);
+
+    vector<uint32_t> delimiters;
+    for (uint32_t i{0}; i < log_surgeon::cSizeOfByte; i++) {
+        if (lexer.is_delimiter(i)) {
+            delimiters.push_back(i);
+        }
+    }
+
+    lexer.m_symbol_id[log_surgeon::cTokenEnd] = static_cast<uint32_t>(SymbolId::TokenEnd);
+    lexer.m_symbol_id[log_surgeon::cTokenUncaughtString]
+            = static_cast<uint32_t>(SymbolId::TokenUncaughtString);
+    lexer.m_id_symbol[static_cast<uint32_t>(SymbolId::TokenEnd)] = log_surgeon::cTokenEnd;
+    lexer.m_id_symbol[static_cast<uint32_t>(SymbolId::TokenUncaughtString)]
+            = log_surgeon::cTokenUncaughtString;
+
+    for (auto const& m_schema_var : schema_ast->m_schema_vars) {
+        // For log-specific lexing: modify variable regex to contain a delimiter at the start.
+        auto delimiter_group{make_unique<RegexASTGroupByte>(RegexASTGroupByte(delimiters))};
+        auto* rule{dynamic_cast<SchemaVarAST*>(m_schema_var.get())};
+        rule->m_regex_ptr = make_unique<RegexASTCatByte>(
+                std::move(delimiter_group),
+                std::move(rule->m_regex_ptr)
+        );
+        if (!lexer.m_symbol_id.contains(rule->m_name)) {
+            lexer.m_symbol_id[rule->m_name] = lexer.m_symbol_id.size();
+            lexer.m_id_symbol[lexer.m_symbol_id[rule->m_name]] = rule->m_name;
+        }
+        lexer.add_rule(lexer.m_symbol_id[rule->m_name], std::move(rule->m_regex_ptr));
+    }
+    lexer.generate();
+}
+
+auto test_scanning_input(ByteLexer& lexer, std::string_view input, std::string_view symbol)
+        -> void {
+    lexer.reset();
+
+    log_surgeon::ParserInputBuffer input_buffer;
+    string token_string{input};
+    input_buffer.set_storage(token_string.data(), token_string.size(), 0, true);
+    lexer.prepend_start_of_file_char(input_buffer);
+
+    log_surgeon::Token token;
+    auto error_code{lexer.scan(input_buffer, token)};
+    REQUIRE(log_surgeon::ErrorCode::Success == error_code);
+    REQUIRE(nullptr != token.m_type_ids_ptr);
+    REQUIRE(1 == token.m_type_ids_ptr->size());
+    REQUIRE(symbol == lexer.m_id_symbol[token.m_type_ids_ptr->at(0)]);
+    REQUIRE(input == token.to_string_view());
+
+    error_code = lexer.scan(input_buffer, token);
+    REQUIRE(log_surgeon::ErrorCode::Success == error_code);
+    REQUIRE(nullptr != token.m_type_ids_ptr);
+    REQUIRE(1 == token.m_type_ids_ptr->size());
+    REQUIRE(log_surgeon::cTokenEnd == lexer.m_id_symbol[token.m_type_ids_ptr->at(0)]);
+    REQUIRE(token.to_string_view().empty());
+
+    // TODO: add check for register values when simulation is implemented.
+}
 }  // namespace
 
 TEST_CASE("Test the Schema class", "[Schema]") {
     SECTION("Add a number variable to schema") {
-        log_surgeon::Schema schema;
+        Schema schema;
         string const var_name = "myNumber";
         string const var_schema = var_name + string(":") + string("123");
         schema.add_variable(string_view(var_schema), -1);
@@ -89,7 +173,7 @@ TEST_CASE("Test the Schema class", "[Schema]") {
     }
 
     SECTION("Add a capture variable to schema") {
-        log_surgeon::Schema schema;
+        Schema schema;
         std::string const var_name = "capture";
         string const var_schema = var_name + string(":") + string("u(?<uID>[0-9]+)");
         schema.add_variable(var_schema, -1);
@@ -208,3 +292,59 @@ TEST_CASE("Test the Schema class", "[Schema]") {
         );
     }
 }
+
+TEST_CASE("Test basic Lexer", "[Lexer]") {
+    constexpr string_view cVarName{"myVar"};
+    constexpr string_view cVarSchema{"myVar:123"};
+    constexpr string_view cTokenString1{"123"};
+    constexpr string_view cTokenString2{"234"};
+
+    Schema schema;
+    schema.add_variable(cVarSchema, -1);
+
+    ByteLexer lexer;
+    initialize_lexer(std::move(schema.release_schema_ast_ptr()), lexer);
+
+    test_scanning_input(lexer, cTokenString1, cVarName);
+    test_scanning_input(lexer, cTokenString2, log_surgeon::cTokenUncaughtString);
+}
+
+TEST_CASE("Test Lexer with capture groups", "[Lexer]") {
+    vector<uint32_t> const cDelimiters{' ', '\n'};
+    constexpr string_view cVarName{"myVar"};
+    constexpr string_view cCaptureName{"uid"};
+    constexpr string_view cVarSchema{"myVar:userID=(?<uid>123)"};
+    constexpr string_view cTokenString1{"userID=123"};
+    constexpr string_view cTokenString2{"userID=234"};
+    constexpr string_view cTokenString3{"123"};
+
+    Schema schema;
+    schema.add_variable(cVarSchema, -1);
+
+    ByteLexer lexer;
+    initialize_lexer(std::move(schema.release_schema_ast_ptr()), lexer);
+
+    string varName{cVarName};
+    auto const var_id{lexer.m_symbol_id.find(varName)};
+    REQUIRE(lexer.m_symbol_id.end() != var_id);
+
+    string captureName{cCaptureName};
+    auto const capture_id{lexer.m_symbol_id.find(captureName)};
+    REQUIRE(lexer.m_symbol_id.end() != capture_id);
+
+    auto capture_ids{lexer.get_capture_ids_for_var_id(var_id->second)};
+    REQUIRE(capture_ids.has_value());
+    REQUIRE(1 == capture_ids.value().size());
+    REQUIRE(capture_id->second == capture_ids.value()[0]);
+
+    auto tag_ids{lexer.get_tag_ids_for_capture_id(capture_ids.value()[0])};
+    REQUIRE(tag_ids.has_value());
+    REQUIRE(std::make_pair(0u, 1u) == tag_ids.value());
+
+    // TODO: add check for get_register_for_tag_id and get_registers_for_capture when
+    // determinization is implemented.
+
+    test_scanning_input(lexer, cTokenString1, cVarName);
+    test_scanning_input(lexer, cTokenString2, log_surgeon::cTokenUncaughtString);
+    test_scanning_input(lexer, cTokenString3, log_surgeon::cTokenUncaughtString);
+}
diff --git a/tests/test-nfa.cpp b/tests/test-nfa.cpp
index 719a168e..2c4e3477 100644
--- a/tests/test-nfa.cpp
+++ b/tests/test-nfa.cpp
@@ -44,9 +44,10 @@ TEST_CASE("Test NFA", "[NFA]") {
     auto& capture_rule_ast = dynamic_cast<SchemaVarAST&>(*schema_ast->m_schema_vars[0]);
     vector<ByteLexicalRule> rules;
     rules.emplace_back(0, std::move(capture_rule_ast.m_regex_ptr));
-    ByteNfa const nfa{std::move(rules)};
+    ByteNfa const nfa{rules};
 
     // Compare against expected output
+    // capture order(tags in brackets): letter1(0,1), letter2(2,3), letter(4,5), containerID(6,7)
     string expected_serialized_nfa = "0:byte_transitions={A-->1,Z-->2},"
                                      "epsilon_transitions={},"
                                      "positive_tagged_start_transitions={},"
@@ -54,18 +55,17 @@ TEST_CASE("Test NFA", "[NFA]") {
                                      "negative_tagged_transition={}\n";
     expected_serialized_nfa += "1:byte_transitions={},"
                                "epsilon_transitions={},"
-                               "positive_tagged_start_transitions={3[letter]},"
+                               "positive_tagged_start_transitions={3[4]},"
                                "positive_tagged_end_transitions={},"
                                "negative_tagged_transition={}\n";
-    expected_serialized_nfa
-            += "2:byte_transitions={},"
-               "epsilon_transitions={},"
-               "positive_tagged_start_transitions={},"
-               "positive_tagged_end_transitions={},"
-               "negative_tagged_transition={4[letter1,letter2,letter,containerID]}\n";
+    expected_serialized_nfa += "2:byte_transitions={},"
+                               "epsilon_transitions={},"
+                               "positive_tagged_start_transitions={},"
+                               "positive_tagged_end_transitions={},"
+                               "negative_tagged_transition={4[0,1,2,3,4,5,6,7]}\n";
     expected_serialized_nfa += "3:byte_transitions={},"
                                "epsilon_transitions={},"
-                               "positive_tagged_start_transitions={5[letter1],6[letter2]},"
+                               "positive_tagged_start_transitions={5[0],6[2]},"
                                "positive_tagged_end_transitions={},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "4:accepting_tag=0,byte_transitions={},"
@@ -86,27 +86,27 @@ TEST_CASE("Test NFA", "[NFA]") {
     expected_serialized_nfa += "7:byte_transitions={},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
-                               "positive_tagged_end_transitions={9[letter1]},"
+                               "positive_tagged_end_transitions={9[1]},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "8:byte_transitions={},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
-                               "positive_tagged_end_transitions={10[letter2]},"
+                               "positive_tagged_end_transitions={10[3]},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "9:byte_transitions={},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
                                "positive_tagged_end_transitions={},"
-                               "negative_tagged_transition={11[letter2]}\n";
+                               "negative_tagged_transition={11[2,3]}\n";
     expected_serialized_nfa += "10:byte_transitions={},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
                                "positive_tagged_end_transitions={},"
-                               "negative_tagged_transition={11[letter1]}\n";
+                               "negative_tagged_transition={11[0,1]}\n";
     expected_serialized_nfa += "11:byte_transitions={},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
-                               "positive_tagged_end_transitions={12[letter]},"
+                               "positive_tagged_end_transitions={12[5]},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "12:byte_transitions={B-->13},"
                                "epsilon_transitions={},"
@@ -115,7 +115,7 @@ TEST_CASE("Test NFA", "[NFA]") {
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "13:byte_transitions={},"
                                "epsilon_transitions={},"
-                               "positive_tagged_start_transitions={14[containerID]},"
+                               "positive_tagged_start_transitions={14[6]},"
                                "positive_tagged_end_transitions={},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "14:byte_transitions={0-->15,1-->15,2-->15,3-->15,4-->15,5-->15,6-->"
@@ -128,7 +128,7 @@ TEST_CASE("Test NFA", "[NFA]") {
                                "15,7-->15,8-->15,9-->15},"
                                "epsilon_transitions={},"
                                "positive_tagged_start_transitions={},"
-                               "positive_tagged_end_transitions={16[containerID]},"
+                               "positive_tagged_end_transitions={16[7]},"
                                "negative_tagged_transition={}\n";
     expected_serialized_nfa += "16:byte_transitions={C-->4},"
                                "epsilon_transitions={},"
diff --git a/tests/test-tag.cpp b/tests/test-tag.cpp
deleted file mode 100644
index 41f8a2ef..00000000
--- a/tests/test-tag.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <catch2/catch_test_macros.hpp>
-
-#include <log_surgeon/finite_automata/Tag.hpp>
-
-using log_surgeon::finite_automata::Tag;
-
-TEST_CASE("Tag operations", "[Tag]") {
-    SECTION("Basic name retrieval works correctly") {
-        Tag const tag{"uID"};
-        REQUIRE("uID" == tag.get_name());
-    }
-
-    SECTION("Empty tag name is handled correctly") {
-        Tag const empty_tag{""};
-        REQUIRE(empty_tag.get_name().empty());
-    }
-
-    SECTION("Special characters in tag names are preserved") {
-        Tag const special_tag{"user.id-123_@"};
-        REQUIRE("user.id-123_@" == special_tag.get_name());
-    }
-
-    SECTION("Copy constructor works correctly") {
-        Tag assign_tag{"target"};
-        assign_tag = Tag{"new_source"};
-        REQUIRE("new_source" == assign_tag.get_name());
-    }
-
-    SECTION("Move constructor works correctly") {
-        Tag original_tag{"source"};
-        Tag moved_tag{std::move(original_tag)};
-        REQUIRE("source" == moved_tag.get_name());
-    }
-}