Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4f162ab
[𝘀𝗽𝗿] initial version
vitalybuka Oct 22, 2025
1e6377a
[𝘀𝗽𝗿] changes to main this commit is based on
vitalybuka Oct 22, 2025
40ba10a
spelling
vitalybuka Oct 22, 2025
b50bc42
[𝘀𝗽𝗿] changes introduced through rebase
vitalybuka Oct 22, 2025
5698542
rebase
vitalybuka Oct 22, 2025
dd1e301
[𝘀𝗽𝗿] changes introduced through rebase
vitalybuka Oct 23, 2025
b821a9b
rebase
vitalybuka Oct 23, 2025
5d58018
rebase
vitalybuka Oct 23, 2025
1840fe0
Update llvm/docs/ProgrammersManual.rst
vitalybuka Oct 24, 2025
b24b6dd
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
f97c94b
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
2a13cea
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
8e62249
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
c1dafd2
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
058bd72
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
f80e076
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
5602078
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
885ae93
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
07dfaf1
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
61cb32e
review comments
vitalybuka Oct 24, 2025
52c6a62
Update llvm/unittests/ADT/RadixTreeTest.cpp
vitalybuka Oct 24, 2025
4d1c890
comment
vitalybuka Oct 24, 2025
a365761
{} and comment
vitalybuka Oct 24, 2025
1ee0909
Apply suggestion from @kazutakahirata
vitalybuka Oct 24, 2025
ecd9566
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
067b34f
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
a2a8d9d
Update llvm/include/llvm/ADT/RadixTree.h
vitalybuka Oct 24, 2025
a88a204
update
vitalybuka Oct 24, 2025
7bb9a6a
copilot
vitalybuka Oct 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions llvm/include/llvm/Support/GlobPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,30 @@ class GlobPattern {
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
if (!Prefix.empty())
if (PrefixSize)
return false;
if (!Suffix.empty())
if (SuffixSize)
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}

StringRef prefix() const { return Prefix; }
StringRef suffix() const { return Suffix; }
// The following functions are just shortcuts for faster matching. They are
// conservative to simplify implementations.

private:
StringRef Prefix;
StringRef Suffix;
// Returns plain prefix of the pattern.
StringRef prefix() const { return Pattern.take_front(PrefixSize); }
// Returns plain suffix of the pattern.
StringRef suffix() const { return Pattern.take_back(SuffixSize); }
// Returns the longest plain substring of the pattern between prefix and
// suffix.
StringRef longest_substr() const;

private:
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
Expand Down
345 changes: 345 additions & 0 deletions llvm/include/llvm/Support/RadixTree.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
//===-- RadixTree.h - Radix Tree implementation -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// This file implements a Radix Tree.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_RADIXTREE_H
#define LLVM_SUPPORT_RADIXTREE_H

#include "llvm/ADT/ADL.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include <cassert>
#include <cstddef>
#include <iterator>
#include <limits>
#include <list>
#include <utility>

namespace llvm {

/// \brief A Radix Tree implementation.
///
/// A Radix Tree (also known as a compact prefix tree or radix trie) is a
/// data structure that stores a dynamic set or associative array where keys
/// are strings and values are associated with these keys. Unlike a regular
/// trie, the edges of a radix tree can be labeled with sequences of characters
/// as well as single characters. This makes radix trees more efficient for
/// storing sparse data sets, where many nodes in a regular trie would have
/// only one child.
///
/// This implementation supports arbitrary key types that can be iterated over
/// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type
/// must provide `begin()` and `end()` for iteration.
///
/// The tree stores `std::pair<const KeyType, T>` as its value type.
///
/// Example usage:
/// \code
/// llvm::RadixTree<StringRef, int> Tree;
/// Tree.emplace("apple", 1);
/// Tree.emplace("grapefruit", 2);
/// Tree.emplace("grape", 3);
///
/// // Find prefixes
/// for (const auto &pair : Tree.find_prefixes("grapefruit juice")) {
/// // pair will be {"grape", 3}
/// // pair will be {"grapefruit", 2}
/// llvm::outs() << pair.first << ": " << pair.second << "\n";
/// }
///
/// // Iterate over all elements
/// for (const auto &pair : Tree) {
/// llvm::outs() << pair.first << ": " << pair.second << "\n";
/// }
/// \endcode
///
/// \note
/// The `RadixTree` takes ownership of the `KeyType` and `T` objects
/// inserted into it. When an element is removed or the tree is destroyed,
/// these objects will be destructed.
/// However, if `KeyType` is a reference-like type, e.g. StringRef or range,
/// User must guarantee that destination has lifetime longer than the tree.
template <typename KeyType, typename T> class RadixTree {
public:
using key_type = KeyType;
using mapped_type = T;
using value_type = std::pair<const KeyType, mapped_type>;

private:
using KeyConstIteratorType =
decltype(adl_begin(std::declval<const key_type &>()));
using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>;
using KeyValueType =
remove_cvref_t<decltype(*adl_begin(std::declval<key_type &>()))>;
using ContainerType = std::list<value_type>;

/// Represents an internal node in the Radix Tree.
struct Node {
KeyConstIteratorRangeType Key = {KeyConstIteratorType{},
KeyConstIteratorType{}};
std::vector<Node> Children;

/// An iterator to the value associated with this node.
///
/// If this node does not have a value (i.e., it's an internal node that
/// only serves as a path to other values), this iterator will be equal
/// to default constructed `ContainerType::iterator()`.
typename ContainerType::iterator Value;

/// The first character of the Key. Used for fast child lookup.
KeyValueType KeyFront;

Node() = default;
Node(const KeyConstIteratorRangeType &Key)
: Key(Key), KeyFront(*Key.begin()) {
assert(!Key.empty());
}

Node(Node &&) = default;
Node &operator=(Node &&) = default;

Node(const Node &) = delete;
Node &operator=(const Node &) = delete;

const Node *findChild(const KeyConstIteratorRangeType &Key) const {
if (Key.empty())
return nullptr;
for (const auto &Child : Children) {
assert(!Child.Key.empty()); // Only root can be empty.
if (Child.KeyFront == *Key.begin())
return &Child;
}
return nullptr;
}

Node *findChild(const KeyConstIteratorRangeType &Query) {
const Node *This = this;
return const_cast<Node *>(This->findChild(Query));
}

size_t countNodes() const {
size_t R = 1;
for (const auto &C : Children)
R += C.countNodes();
return R;
}

///
/// Splits the current node into two.
///
/// This function is used when a new key needs to be inserted that shares
/// a common prefix with the current node's key, but then diverges.
/// The current `Key` is truncated to the common prefix, and a new child
/// node is created for the remainder of the original node's `Key`.
///
/// \param SplitPoint An iterator pointing to the character in the current
/// `Key` where the split should occur.
void split(KeyConstIteratorType SplitPoint) {
Node Child(make_range(SplitPoint, Key.end()));
Key = make_range(Key.begin(), SplitPoint);

Children.swap(Child.Children);
std::swap(Value, Child.Value);

Children.emplace_back(std::move(Child));
}
};

Node Root; // Root is always for empty range.
ContainerType Values;

/// Finds or creates a new tail or leaf node corresponding to the `Key`.
Node &findOrCreate(KeyConstIteratorRangeType Key) {
Node *Curr = &Root;
if (Key.empty())
return *Curr;

for (;;) {
auto [I1, I2] = llvm::mismatch(Key, Curr->Key);
Key = make_range(I1, Key.end());

if (I2 != Curr->Key.end()) {
// Match is partial. Either query is too short, or there is mismatching
// character. Split either way, and put new node in between of the
// current and its children.
Curr->split(I2);

// Split was caused by mismatch, so `findChild` will fail.
break;
}

Node *Child = Curr->findChild(Key);
if (!Child)
break;

// Move to child with the same first character.
Curr = Child;
}

if (Key.empty()) {
// The current node completely matches the key, return it.
return *Curr;
}

// `Key` a suffix of original `Key` unmatched by path from the `Root` to the
// `Curr`, and we have no candidate in the children to match more. Create a
// new one.
return Curr->Children.emplace_back(Key);
}

///
/// An iterator for traversing prefixes search results.
///
/// This iterator is used by `find_prefixes` to traverse the tree and find
/// elements that are prefixes to the given key. It's a forward iterator.
///
/// \tparam MappedType The type of the value pointed to by the iterator.
/// This will be `value_type` for non-const iterators
/// and `const value_type` for const iterators.
template <typename MappedType>
class IteratorImpl
: public iterator_facade_base<IteratorImpl<MappedType>,
std::forward_iterator_tag, MappedType> {
const Node *Curr = nullptr;
KeyConstIteratorRangeType Query;

void findNextValid() {
while (Curr && Curr->Value == typename ContainerType::iterator())
advance();
}

void advance() {
assert(Curr);
if (Query.empty()) {
Curr = nullptr;
return;
}

Curr = Curr->findChild(Query);
if (!Curr) {
Curr = nullptr;
return;
}

auto [I1, I2] = llvm::mismatch(Query, Curr->Key);
if (I2 != Curr->Key.end()) {
Curr = nullptr;
return;
}
Query = make_range(I1, Query.end());
}

friend class RadixTree;
IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q)
: Curr(C), Query(Q) {
findNextValid();
}

public:
IteratorImpl() : Query{{}, {}} {}

MappedType &operator*() const { return *Curr->Value; }

IteratorImpl &operator++() {
advance();
findNextValid();
return *this;
}

bool operator==(const IteratorImpl &Other) const {
return Curr == Other.Curr;
}
};

public:
RadixTree() = default;
RadixTree(RadixTree &&) = default;
RadixTree &operator=(RadixTree &&) = default;

using prefix_iterator = IteratorImpl<value_type>;
using const_prefix_iterator = IteratorImpl<const value_type>;

using iterator = typename ContainerType::iterator;
using const_iterator = typename ContainerType::const_iterator;

/// Returns true if the tree is empty.
bool empty() const { return Values.empty(); }

/// Returns the number of elements in the tree.
size_t size() const { return Values.size(); }

/// Returns the number of nodes in the tree.
///
/// This function counts all internal nodes in the tree. It can be useful for
/// understanding the memory footprint or complexity of the tree structure.
size_t countNodes() const { return Root.countNodes(); }

/// Returns an iterator to the first element.
iterator begin() { return Values.begin(); }
const_iterator begin() const { return Values.begin(); }

/// Returns an iterator to the end of the tree.
iterator end() { return Values.end(); }
const_iterator end() const { return Values.end(); }

/// Constructs and inserts a new element into the tree.
///
/// This function constructs an element in-place within the tree. If an
/// element with the same key already exists, the insertion fails and the
/// function returns an iterator to the existing element along with `false`.
/// Otherwise, the new element is inserted and the function returns an
/// iterator to the new element along with `true`.
///
/// \param Key The key of the element to construct.
/// \param Args Arguments to forward to the constructor of the mapped_type.
/// \return A pair consisting of an iterator to the inserted element (or to
/// the element that prevented insertion) and a boolean value
/// indicating whether the insertion took place.
template <typename... Ts>
std::pair<iterator, bool> emplace(key_type &&Key, Ts &&...Args) {
const value_type &NewValue =
Values.emplace_front(std::move(Key), T(std::move(Args)...));
Node &Node = findOrCreate(NewValue.first);
bool HasValue = Node.Value != typename ContainerType::iterator();
if (!HasValue) {
Node.Value = Values.begin();
} else {
Values.pop_front();
}
return std::make_pair(Node.Value, !HasValue);
}

///
/// Finds all elements whose keys are prefixes of the given `Key`.
///
/// This function returns an iterator range over all elements in the tree
/// whose keys are prefixes of the provided `Key`. For example, if the tree
/// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function
/// would return iterators to "abcde" and "abc".
///
/// \param Key The key to search for prefixes of.
/// \return An `iterator_range` of `const_prefix_iterator`s, allowing
/// iteration over the found prefix elements.
/// \note The returned iterators reference the `Key` provided by the caller.
/// The caller must ensure that `Key` remains valid for the lifetime
/// of the iterators.
iterator_range<const_prefix_iterator>
find_prefixes(const key_type &Key) const {
return iterator_range<const_prefix_iterator>{
const_prefix_iterator(
&Root, KeyConstIteratorRangeType{adl_begin(Key), adl_end(Key)}),
const_prefix_iterator{}};
}
};

} // namespace llvm

#endif // LLVM_SUPPORT_RADIXTREE_H
Loading
Loading