Skip to content

Commit

Permalink
ART: Fix a few issues around lower bound / erase
Browse files Browse the repository at this point in the history
1. Leaf keys were written to the wrong location.
2. In `art_node_iterator_lower_bound`, we did not check if the key chunk for the
   lower bound child was greater than the key chunk we searched for.
3. In `art_iterator_erase` there were a few issues that would lead to an
   incorrect location. I've somewhat sidestepped the most tricky bit by doing a
   lower bound search from the root instead.

Added tests for 1 and 2.
  • Loading branch information
SLieve committed Jan 16, 2024
1 parent 4a7474e commit c7d1b5d
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 44 deletions.
106 changes: 70 additions & 36 deletions src/art/art.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,13 @@ typedef struct art_node256_s {
typedef struct art_indexed_child_s {
art_node_t *child;
uint8_t index;
art_key_chunk_t key_chunk;
} art_indexed_child_t;

static inline bool art_is_leaf(const art_node_t *node) { return IS_LEAF(node); }

static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) {
memcpy(&leaf->key, key, ART_KEY_BYTES);
memcpy(leaf->key, key, ART_KEY_BYTES);
}

static inline uint8_t art_get_type(const art_inner_node_t *node) {
Expand Down Expand Up @@ -204,8 +205,9 @@ static inline art_node_t *art_node4_erase(art_node4_t *node,
if (node->count == 2) {
// Only one child remains after erasing, so compress the path by
// removing this node.
art_node_t *remaining_child = node->children[idx ^ 1];
art_key_chunk_t remaining_child_key = node->keys[idx ^ 1];
uint8_t other_idx = idx ^ 1;
art_node_t *remaining_child = node->children[other_idx];
art_key_chunk_t remaining_child_key = node->keys[other_idx];
if (!art_is_leaf(remaining_child)) {
// Correct the prefix of the child node.
art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child;
Expand Down Expand Up @@ -250,6 +252,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node,
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -266,6 +269,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node,
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -278,6 +282,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node,
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -288,6 +293,7 @@ static inline art_indexed_child_t art_node4_lower_bound(
if (node->keys[i] >= key_chunk) {
indexed_child.index = i;
indexed_child.child = node->children[i];
indexed_child.key_chunk = node->keys[i];
return indexed_child;
}
}
Expand Down Expand Up @@ -399,6 +405,7 @@ static inline art_indexed_child_t art_node16_next_child(
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -415,6 +422,7 @@ static inline art_indexed_child_t art_node16_prev_child(
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -427,6 +435,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node,
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = node->keys[index];
return indexed_child;
}

Expand All @@ -437,6 +446,7 @@ static inline art_indexed_child_t art_node16_lower_bound(
if (node->keys[i] >= key_chunk) {
indexed_child.index = i;
indexed_child.child = node->children[i];
indexed_child.key_chunk = node->keys[i];
return indexed_child;
}
}
Expand Down Expand Up @@ -534,8 +544,9 @@ static inline art_indexed_child_t art_node48_next_child(
index++;
for (size_t i = index; i < 256; ++i) {
if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
indexed_child.child = node->children[node->keys[i]];
indexed_child.index = i;
indexed_child.child = node->children[node->keys[i]];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand All @@ -552,8 +563,9 @@ static inline art_indexed_child_t art_node48_prev_child(
art_indexed_child_t indexed_child;
for (int i = index; i >= 0; --i) {
if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
indexed_child.child = node->children[node->keys[i]];
indexed_child.index = i;
indexed_child.child = node->children[node->keys[i]];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand All @@ -570,6 +582,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node,
}
indexed_child.index = index;
indexed_child.child = node->children[node->keys[index]];
indexed_child.key_chunk = index;
return indexed_child;
}

Expand All @@ -580,6 +593,7 @@ static inline art_indexed_child_t art_node48_lower_bound(
if (node->keys[i] != ART_NODE48_EMPTY_VAL) {
indexed_child.index = i;
indexed_child.child = node->children[node->keys[i]];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand Down Expand Up @@ -651,8 +665,9 @@ static inline art_indexed_child_t art_node256_next_child(
index++;
for (size_t i = index; i < 256; ++i) {
if (node->children[i] != NULL) {
indexed_child.child = node->children[i];
indexed_child.index = i;
indexed_child.child = node->children[i];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand All @@ -669,8 +684,9 @@ static inline art_indexed_child_t art_node256_prev_child(
art_indexed_child_t indexed_child;
for (int i = index; i >= 0; --i) {
if (node->children[i] != NULL) {
indexed_child.child = node->children[i];
indexed_child.index = i;
indexed_child.child = node->children[i];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand All @@ -687,6 +703,7 @@ static inline art_indexed_child_t art_node256_child_at(
}
indexed_child.index = index;
indexed_child.child = node->children[index];
indexed_child.key_chunk = index;
return indexed_child;
}

Expand All @@ -697,6 +714,7 @@ static inline art_indexed_child_t art_node256_lower_bound(
if (node->children[i] != NULL) {
indexed_child.index = i;
indexed_child.child = node->children[i];
indexed_child.key_chunk = i;
return indexed_child;
}
}
Expand Down Expand Up @@ -1422,6 +1440,10 @@ static bool art_node_iterator_lower_bound(const art_node_t *node,
// Prefix so far has been equal, but we've found a smaller key.
// Since we take the lower bound within each node, we can return the
// next leaf.
bool went_up = art_iterator_up(iterator);
if (!went_up) {
return art_iterator_invalid_loc(iterator);
}
return art_iterator_move(iterator, true);
} else if (prefix_comparison > 0) {
// No key equal to the key we're looking for, return the first leaf.
Expand All @@ -1440,7 +1462,11 @@ static bool art_node_iterator_lower_bound(const art_node_t *node,
}
return art_iterator_move(iterator, true);
}
// We found a child with a greater or equal prefix.
if (indexed_child.key_chunk > key_chunk) {
// Only larger children, return the first leaf from the node.
return art_node_init_iterator(node, iterator, true);
}
// We found a child with an equal prefix.
art_iterator_down(iterator, inner_node, indexed_child.index);
node = indexed_child.child;
}
Expand Down Expand Up @@ -1472,14 +1498,14 @@ bool art_iterator_prev(art_iterator_t *iterator) {

bool art_iterator_lower_bound(art_iterator_t *iterator,
const art_key_chunk_t *key) {
int compare_result = art_compare_keys(iterator->key, key);
int compare_result =
art_compare_prefix(iterator->key, 0, key, 0, iterator->depth);
// Move up until we have an equal or greater prefix, after which we can do a
// normal lower bound search.
while (compare_result < 0 && iterator->frame > 0) {
if (!art_iterator_up(iterator)) {
// Only smaller keys found.
return art_node_iterator_lower_bound(art_iterator_node(iterator),
iterator, key);
return art_iterator_invalid_loc(iterator);
}
// Since we're only moving up, we can keep comparing against the
// iterator key.
Expand Down Expand Up @@ -1528,42 +1554,50 @@ art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) {
if (iterator->value == NULL) {
return NULL;
}
art_key_chunk_t initial_key[ART_KEY_BYTES];
memcpy(initial_key, iterator->key, ART_KEY_BYTES);

art_val_t *value_erased = iterator->value;
bool went_up = art_iterator_up(iterator);
if (!went_up) {
// We're erasing the root.
art->root = NULL;
art_iterator_invalid_loc(iterator);
return value_erased;
}

// Erase the leaf.
art_node_t *child_to_replace;
{
art_inner_node_t *node =
(art_inner_node_t *)art_iterator_node(iterator);
art_key_chunk_t key_chunk =
iterator->key[iterator->depth + node->prefix_size];
child_to_replace = art_node_erase(node, key_chunk);
art_inner_node_t *parent_node =
(art_inner_node_t *)art_iterator_node(iterator);
art_key_chunk_t key_chunk_in_parent =
iterator->key[iterator->depth + parent_node->prefix_size];
art_node_t *new_parent_node =
art_node_erase(parent_node, key_chunk_in_parent);

if (new_parent_node != ((art_node_t *)parent_node)) {
// Replace the pointer to the inner node we erased from in its
// parent (it may be a leaf now).
iterator->frames[iterator->frame].node = new_parent_node;
went_up = art_iterator_up(iterator);
if (went_up) {
art_inner_node_t *grandparent_node =
(art_inner_node_t *)art_iterator_node(iterator);
art_key_chunk_t key_chunk_in_grandparent =
iterator->key[iterator->depth + grandparent_node->prefix_size];
art_replace(grandparent_node, key_chunk_in_grandparent,
new_parent_node);
} else {
// We were already at the rootmost node.
art->root = new_parent_node;
}
}

// Replace the pointer to the inner node we erased from in its parent (it
// may be a leaf now).
went_up = art_iterator_up(iterator);
if (went_up) {
art_inner_node_t *node =
(art_inner_node_t *)art_iterator_node(iterator);
art_key_chunk_t key_chunk =
iterator->key[iterator->depth + node->prefix_size];
art_replace(node, key_chunk, child_to_replace);
} else {
// This node was the rootmost node.
art->root = child_to_replace;
iterator->frames[0].node = child_to_replace;
}
art_key_chunk_t initial_key[ART_KEY_BYTES];
memcpy(initial_key, iterator->key, ART_KEY_BYTES);
// Search for the first key after the one we erased.
art_iterator_lower_bound(iterator, initial_key);
iterator->frame = 0;
iterator->depth = 0;
// Do a lower bound search for the initial key, which will find the first
// greater key if it exists. This can likely be mildly faster if we instead
// start from the current position.
art_node_iterator_lower_bound(art->root, iterator, initial_key);
return value_erased;
}

Expand Down
23 changes: 15 additions & 8 deletions tests/art_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,24 +347,31 @@ DEFINE_TEST(test_art_upper_bound) {
}

DEFINE_TEST(test_art_iterator_erase) {
std::vector<const char*> keys = {
"000001", "000002", "000003", "000004", "001005",
};
std::vector<Value> values = {{1}, {2}, {3}, {4}, {5}};
std::vector<std::array<uint8_t, 6>> keys;
std::vector<Value> values;
std::vector<size_t> sizes = {1, 4, 16, 48, 256};
for (size_t i = 0; i < sizes.size(); i++) {
uint8_t size = static_cast<uint8_t>(sizes[i]);
for (size_t j = 0; j < size; j++) {
keys.push_back(
{0, 0, 0, static_cast<uint8_t>(i), static_cast<uint8_t>(j)});
values.push_back({static_cast<uint64_t>(i) * j});
}
}
art_t art{NULL};
for (size_t i = 0; i < keys.size(); ++i) {
art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]);
art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]);
}
art_iterator_t iterator = art_init_iterator(&art, true);
size_t i = 0;
do {
assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]);
assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data());
assert_true(iterator.value == &values[i]);
assert_true(art_iterator_erase(&art, &iterator) == &values[i]);
assert_false(art_find(&art, (art_key_chunk_t*)keys[i]));
assert_false(art_find(&art, (art_key_chunk_t*)keys[i].data()));
++i;
} while (iterator.value != NULL);
assert_true(i == 5);
assert_true(i == values.size());
art_free(&art);
}

Expand Down
24 changes: 24 additions & 0 deletions tests/roaring64_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,30 @@ DEFINE_TEST(test_and_inplace) {
roaring64_bitmap_free(r1);
roaring64_bitmap_free(r2);
}
{
// In-place should be the same as not-in-place.
uint64_t start = 0x0FFFF;
uint64_t end = 0x20001;

roaring64_bitmap_t* r1 = roaring64_bitmap_from_range(start, end, 1);
roaring64_bitmap_add(r1, 0xFFFF0000);

roaring64_bitmap_t* r2 = roaring64_bitmap_from_range(start, end, 1);

uint64_t and_cardinality = roaring64_bitmap_and_cardinality(r1, r2);
assert_int_equal(and_cardinality, end - start);

roaring64_bitmap_t* r3 = roaring64_bitmap_and(r1, r2);
assert_int_equal(roaring64_bitmap_get_cardinality(r3), and_cardinality);

roaring64_bitmap_and_inplace(r1, r2);
assert_int_equal(roaring64_bitmap_get_cardinality(r1), and_cardinality);
assert_true(roaring64_bitmap_equals(r1, r3));

roaring64_bitmap_free(r1);
roaring64_bitmap_free(r2);
roaring64_bitmap_free(r3);
}
}

DEFINE_TEST(test_intersect) {
Expand Down

0 comments on commit c7d1b5d

Please sign in to comment.