-
Couldn't load subscription status.
- Fork 15k
[NFC][ADT] Add RadixTree #164524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
vitalybuka
merged 29 commits into
main
from
users/vitalybuka/spr/radixtree-add-radixtree
Oct 25, 2025
+740
−0
Merged
[NFC][ADT] Add RadixTree #164524
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit
Hold shift + click to select a range
4f162ab
[𝘀𝗽𝗿] initial version
vitalybuka 1e6377a
[𝘀𝗽𝗿] changes to main this commit is based on
vitalybuka 40ba10a
spelling
vitalybuka b50bc42
[𝘀𝗽𝗿] changes introduced through rebase
vitalybuka 5698542
rebase
vitalybuka dd1e301
[𝘀𝗽𝗿] changes introduced through rebase
vitalybuka b821a9b
rebase
vitalybuka 5d58018
rebase
vitalybuka 1840fe0
Update llvm/docs/ProgrammersManual.rst
vitalybuka b24b6dd
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka f97c94b
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 2a13cea
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 8e62249
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka c1dafd2
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 058bd72
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka f80e076
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 5602078
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 885ae93
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 07dfaf1
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 61cb32e
review comments
vitalybuka 52c6a62
Update llvm/unittests/ADT/RadixTreeTest.cpp
vitalybuka 4d1c890
comment
vitalybuka a365761
{} and comment
vitalybuka 1ee0909
Apply suggestion from @kazutakahirata
vitalybuka ecd9566
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka 067b34f
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka a2a8d9d
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka a88a204
update
vitalybuka 7bb9a6a
copilot
vitalybuka File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,349 @@ | ||
| //===-- llvm/ADT/RadixTree.h - Radix Tree implementation --------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file implements a Radix Tree. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_ADT_RADIXTREE_H | ||
| #define LLVM_ADT_RADIXTREE_H | ||
|
|
||
| #include "llvm/ADT/ADL.h" | ||
| #include "llvm/ADT/STLExtras.h" | ||
| #include "llvm/ADT/iterator.h" | ||
| #include "llvm/ADT/iterator_range.h" | ||
| #include <cassert> | ||
| #include <cstddef> | ||
| #include <iterator> | ||
| #include <limits> | ||
| #include <list> | ||
| #include <utility> | ||
|
|
||
| namespace llvm { | ||
|
|
||
| /// \brief A Radix Tree implementation. | ||
| /// | ||
| /// A Radix Tree (also known as a compact prefix tree or radix trie) is a | ||
| /// data structure that stores a dynamic set or associative array where keys | ||
| /// are strings and values are associated with these keys. Unlike a regular | ||
| /// trie, the edges of a radix tree can be labeled with sequences of characters | ||
| /// as well as single characters. This makes radix trees more efficient for | ||
| /// storing sparse data sets, where many nodes in a regular trie would have | ||
| /// only one child. | ||
| /// | ||
| /// This implementation supports arbitrary key types that can be iterated over | ||
| /// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type | ||
| /// must provide `begin()` and `end()` for iteration. | ||
| /// | ||
| /// The tree stores `std::pair<const KeyType, T>` as its value type. | ||
| /// | ||
| /// Example usage: | ||
| /// \code | ||
| /// llvm::RadixTree<StringRef, int> Tree; | ||
| /// Tree.emplace("apple", 1); | ||
| /// Tree.emplace("grapefruit", 2); | ||
| /// Tree.emplace("grape", 3); | ||
| /// | ||
| /// // Find prefixes | ||
| /// for (const auto &[Key, Value] : Tree.find_prefixes("grapefruit juice")) { | ||
| /// // pair will be {"grape", 3} | ||
| /// // pair will be {"grapefruit", 2} | ||
| /// llvm::outs() << Key << ": " << Value << "\n"; | ||
| /// } | ||
| /// | ||
| /// // Iterate over all elements | ||
| /// for (const auto &[Key, Value] : Tree) | ||
| /// llvm::outs() << Key << ": " << Value << "\n"; | ||
| /// \endcode | ||
| /// | ||
| /// \note | ||
| /// The `RadixTree` takes ownership of the `KeyType` and `T` objects | ||
| /// inserted into it. When an element is removed or the tree is destroyed, | ||
| /// these objects will be destructed. | ||
| /// However, if `KeyType` is a reference-like type, e.g., StringRef or range, | ||
| /// the user must guarantee that destination has lifetime longer than the tree. | ||
| template <typename KeyType, typename T> class RadixTree { | ||
| public: | ||
| using key_type = KeyType; | ||
| using mapped_type = T; | ||
| using value_type = std::pair<const KeyType, mapped_type>; | ||
|
|
||
| private: | ||
| using KeyConstIteratorType = | ||
| decltype(adl_begin(std::declval<const key_type &>())); | ||
| using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>; | ||
| using KeyValueType = | ||
| remove_cvref_t<decltype(*adl_begin(std::declval<key_type &>()))>; | ||
| using ContainerType = std::list<value_type>; | ||
dwblaikie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /// Represents an internal node in the Radix Tree. | ||
| struct Node { | ||
| KeyConstIteratorRangeType Key{KeyConstIteratorType{}, | ||
| KeyConstIteratorType{}}; | ||
| std::vector<Node> Children; | ||
vitalybuka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /// An iterator to the value associated with this node. | ||
| /// | ||
| /// If this node does not have a value (i.e., it's an internal node that | ||
| /// only serves as a path to other values), this iterator will be equal | ||
| /// to default constructed `ContainerType::iterator()`. | ||
| typename ContainerType::iterator Value; | ||
|
|
||
| /// The first character of the Key. Used for fast child lookup. | ||
| KeyValueType KeyFront; | ||
|
|
||
| Node() = default; | ||
| Node(const KeyConstIteratorRangeType &Key) | ||
| : Key(Key), KeyFront(*Key.begin()) { | ||
| assert(!Key.empty()); | ||
| } | ||
|
|
||
| Node(Node &&) = default; | ||
| Node &operator=(Node &&) = default; | ||
|
|
||
| Node(const Node &) = delete; | ||
| Node &operator=(const Node &) = delete; | ||
|
|
||
| const Node *findChild(const KeyConstIteratorRangeType &Key) const { | ||
| if (Key.empty()) | ||
| return nullptr; | ||
| for (const Node &Child : Children) { | ||
| assert(!Child.Key.empty()); // Only root can be empty. | ||
| if (Child.KeyFront == *Key.begin()) | ||
| return &Child; | ||
| } | ||
vitalybuka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return nullptr; | ||
| } | ||
|
|
||
| Node *findChild(const KeyConstIteratorRangeType &Query) { | ||
| const Node *This = this; | ||
| return const_cast<Node *>(This->findChild(Query)); | ||
| } | ||
|
|
||
| size_t countNodes() const { | ||
| size_t R = 1; | ||
| for (const Node &C : Children) | ||
| R += C.countNodes(); | ||
| return R; | ||
| } | ||
|
|
||
| /// | ||
| /// Splits the current node into two. | ||
| /// | ||
| /// This function is used when a new key needs to be inserted that shares | ||
| /// a common prefix with the current node's key, but then diverges. | ||
| /// The current `Key` is truncated to the common prefix, and a new child | ||
| /// node is created for the remainder of the original node's `Key`. | ||
| /// | ||
| /// \param SplitPoint An iterator pointing to the character in the current | ||
| /// `Key` where the split should occur. | ||
| void split(KeyConstIteratorType SplitPoint) { | ||
| Node Child(make_range(SplitPoint, Key.end())); | ||
| Key = make_range(Key.begin(), SplitPoint); | ||
|
|
||
| Children.swap(Child.Children); | ||
| std::swap(Value, Child.Value); | ||
|
|
||
| Children.emplace_back(std::move(Child)); | ||
| } | ||
| }; | ||
|
|
||
| /// Root always corresponds to the empty key, which is the shortest possible | ||
| /// prefix for everything. | ||
| Node Root; | ||
| ContainerType KeyValuePairs; | ||
|
|
||
| /// Finds or creates a new tail or leaf node corresponding to the `Key`. | ||
| Node &findOrCreate(KeyConstIteratorRangeType Key) { | ||
| Node *Curr = &Root; | ||
| if (Key.empty()) | ||
| return *Curr; | ||
|
|
||
| for (;;) { | ||
| auto [I1, I2] = llvm::mismatch(Key, Curr->Key); | ||
| Key = make_range(I1, Key.end()); | ||
|
|
||
| if (I2 != Curr->Key.end()) { | ||
| // Match is partial. Either query is too short, or there is mismatching | ||
| // character. Split either way, and put new node in between of the | ||
| // current and its children. | ||
| Curr->split(I2); | ||
|
|
||
| // Split was caused by mismatch, so `findChild` would fail. | ||
| break; | ||
| } | ||
|
|
||
| Node *Child = Curr->findChild(Key); | ||
| if (!Child) | ||
| break; | ||
|
|
||
| // Move to child with the same first character. | ||
| Curr = Child; | ||
| } | ||
|
|
||
| if (Key.empty()) { | ||
| // The current node completely matches the key, return it. | ||
| return *Curr; | ||
| } | ||
|
|
||
| // `Key` is a suffix of original `Key` unmatched by path from the `Root` to | ||
| // the `Curr`, and we have no candidate in the children to match more. | ||
| // Create a new one. | ||
| return Curr->Children.emplace_back(Key); | ||
| } | ||
|
|
||
| /// | ||
| /// An iterator for traversing prefixes search results. | ||
| /// | ||
| /// This iterator is used by `find_prefixes` to traverse the tree and find | ||
| /// elements that are prefixes to the given key. It's a forward iterator. | ||
| /// | ||
| /// \tparam MappedType The type of the value pointed to by the iterator. | ||
| /// This will be `value_type` for non-const iterators | ||
| /// and `const value_type` for const iterators. | ||
| template <typename MappedType> | ||
| class IteratorImpl | ||
| : public iterator_facade_base<IteratorImpl<MappedType>, | ||
| std::forward_iterator_tag, MappedType> { | ||
| const Node *Curr = nullptr; | ||
| KeyConstIteratorRangeType Query{KeyConstIteratorType{}, | ||
| KeyConstIteratorType{}}; | ||
|
|
||
| void findNextValid() { | ||
| while (Curr && Curr->Value == typename ContainerType::iterator()) | ||
vitalybuka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| advance(); | ||
| } | ||
|
|
||
| void advance() { | ||
| assert(Curr); | ||
| if (Query.empty()) { | ||
| Curr = nullptr; | ||
| return; | ||
| } | ||
|
|
||
| Curr = Curr->findChild(Query); | ||
| if (!Curr) { | ||
| Curr = nullptr; | ||
| return; | ||
| } | ||
|
|
||
vitalybuka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| auto [I1, I2] = llvm::mismatch(Query, Curr->Key); | ||
| if (I2 != Curr->Key.end()) { | ||
| Curr = nullptr; | ||
| return; | ||
| } | ||
| Query = make_range(I1, Query.end()); | ||
| } | ||
|
|
||
| friend class RadixTree; | ||
| IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q) | ||
| : Curr(C), Query(Q) { | ||
| findNextValid(); | ||
| } | ||
|
|
||
| public: | ||
| IteratorImpl() = default; | ||
|
|
||
| MappedType &operator*() const { return *Curr->Value; } | ||
|
|
||
| IteratorImpl &operator++() { | ||
| advance(); | ||
| findNextValid(); | ||
| return *this; | ||
| } | ||
|
|
||
| bool operator==(const IteratorImpl &Other) const { | ||
| return Curr == Other.Curr; | ||
| } | ||
| }; | ||
|
|
||
| public: | ||
| RadixTree() = default; | ||
| RadixTree(RadixTree &&) = default; | ||
| RadixTree &operator=(RadixTree &&) = default; | ||
|
|
||
| using prefix_iterator = IteratorImpl<value_type>; | ||
| using const_prefix_iterator = IteratorImpl<const value_type>; | ||
|
|
||
| using iterator = typename ContainerType::iterator; | ||
| using const_iterator = typename ContainerType::const_iterator; | ||
|
|
||
| /// Returns true if the tree is empty. | ||
| bool empty() const { return KeyValuePairs.empty(); } | ||
|
|
||
| /// Returns the number of elements in the tree. | ||
| size_t size() const { return KeyValuePairs.size(); } | ||
|
|
||
| /// Returns the number of nodes in the tree. | ||
| /// | ||
| /// This function counts all internal nodes in the tree. It can be useful for | ||
| /// understanding the memory footprint or complexity of the tree structure. | ||
| size_t countNodes() const { return Root.countNodes(); } | ||
|
|
||
| /// Returns an iterator to the first element. | ||
| iterator begin() { return KeyValuePairs.begin(); } | ||
| const_iterator begin() const { return KeyValuePairs.begin(); } | ||
|
|
||
| /// Returns an iterator to the end of the tree. | ||
| iterator end() { return KeyValuePairs.end(); } | ||
| const_iterator end() const { return KeyValuePairs.end(); } | ||
|
|
||
| /// Constructs and inserts a new element into the tree. | ||
| /// | ||
| /// This function constructs an element in place within the tree. If an | ||
| /// element with the same key already exists, the insertion fails and the | ||
| /// function returns an iterator to the existing element along with `false`. | ||
| /// Otherwise, the new element is inserted and the function returns an | ||
| /// iterator to the new element along with `true`. | ||
| /// | ||
| /// \param Key The key of the element to construct. | ||
| /// \param Args Arguments to forward to the constructor of the mapped_type. | ||
| /// \return A pair consisting of an iterator to the inserted element (or to | ||
| /// the element that prevented insertion) and a boolean value | ||
| /// indicating whether the insertion took place. | ||
| template <typename... Ts> | ||
| std::pair<iterator, bool> emplace(key_type &&Key, Ts &&...Args) { | ||
| // We want to make new `Node` to refer key in the container, not the one | ||
| // from the argument. | ||
| // FIXME: Determine that we need a new node, before expanding | ||
| // `KeyValuePairs`. | ||
| const value_type &NewValue = | ||
| KeyValuePairs.emplace_front(std::move(Key), T(std::move(Args)...)); | ||
vitalybuka marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Node &Node = findOrCreate(NewValue.first); | ||
| bool HasValue = Node.Value != typename ContainerType::iterator(); | ||
vitalybuka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (!HasValue) | ||
| Node.Value = KeyValuePairs.begin(); | ||
| else | ||
| KeyValuePairs.pop_front(); | ||
| return {Node.Value, !HasValue}; | ||
| } | ||
|
|
||
| /// | ||
| /// Finds all elements whose keys are prefixes of the given `Key`. | ||
| /// | ||
| /// This function returns an iterator range over all elements in the tree | ||
| /// whose keys are prefixes of the provided `Key`. For example, if the tree | ||
| /// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function | ||
| /// would return iterators to "abcde" and "abc". | ||
| /// | ||
| /// \param Key The key to search for prefixes of. | ||
| /// \return An `iterator_range` of `const_prefix_iterator`s, allowing | ||
| /// iteration over the found prefix elements. | ||
| /// \note The returned iterators reference the `Key` provided by the caller. | ||
| /// The caller must ensure that `Key` remains valid for the lifetime | ||
| /// of the iterators. | ||
| iterator_range<const_prefix_iterator> | ||
| find_prefixes(const key_type &Key) const { | ||
| return iterator_range<const_prefix_iterator>{ | ||
| const_prefix_iterator(&Root, KeyConstIteratorRangeType(Key)), | ||
| const_prefix_iterator{}}; | ||
| } | ||
| }; | ||
|
|
||
| } // namespace llvm | ||
|
|
||
| #endif // LLVM_ADT_RADIXTREE_H | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.