From e715532fe41bd9115829041cbf93b7f5dcb49eac Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Thu, 20 Jun 2024 06:59:33 +0000
Subject: [PATCH 1/5] Fix: Remove logging

---
 java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
index 995b7343..5e523f2e 100644
--- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
+++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp
@@ -60,7 +60,8 @@ JNIEXPORT jlong JNICALL Java_cloud_unum_usearch_Index_c_1create( //
     return result;
 }
 
-JNIEXPORT jlong JNICALL Java_cloud_unum_usearch_Index_c_1createFromFile(JNIEnv *env, jclass, jstring path, jboolean view) {
+JNIEXPORT jlong JNICALL Java_cloud_unum_usearch_Index_c_1createFromFile(JNIEnv* env, jclass, jstring path,
+                                                                        jboolean view) {
     char const* path_cstr = env->GetStringUTFChars(path, 0);
     index_dense_t::state_result_t make_result = index_dense_t::make(path_cstr, view);
     env->ReleaseStringUTFChars(path, path_cstr);
@@ -146,7 +147,6 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add( //
 
     using vector_key_t = typename index_dense_t::vector_key_t;
     using add_result_t = typename index_dense_t::add_result_t;
-    printf("Adding %zu dims \n", (size_t)vector_dims);
 
     add_result_t result = reinterpret_cast<index_dense_t*>(c_ptr)->add(static_cast<vector_key_t>(key), vector_span);
     if (!result) {
@@ -157,9 +157,8 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add( //
     (*env).ReleaseFloatArrayElements(vector, vector_data, 0);
 }
 
-JNIEXPORT jfloatArray JNICALL Java_cloud_unum_usearch_Index_c_1get(
-    JNIEnv *env, jclass, jlong c_ptr, jint key) {
-    
+JNIEXPORT jfloatArray JNICALL Java_cloud_unum_usearch_Index_c_1get(JNIEnv* env, jclass, jlong c_ptr, jint key) {
+
     auto index = reinterpret_cast<index_dense_t*>(c_ptr);
     size_t dim = index->dimensions();
     std::unique_ptr<jfloat[]> vector(new jfloat[dim]);
@@ -170,7 +169,7 @@ JNIEXPORT jfloatArray JNICALL Java_cloud_unum_usearch_Index_c_1get(
         }
     }
     jfloatArray jvector = env->NewFloatArray(dim);
-    if (jvector == nullptr) {  // out of memory
+    if (jvector == nullptr) { // out of memory
         return nullptr;
     }
     env->SetFloatArrayRegion(jvector, 0, dim, vector.get());

From 25a1ced3d3c7c0809370af369d0be34f8cf4eb9e Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sat, 22 Jun 2024 05:09:09 +0000
Subject: [PATCH 2/5] Add: Detecting unreachable nodes

In HNSW some nodes may become disconnected
from the rest of the graph. New `unreachable_nodes`
API allows retrieving the number of such entries.
On a real-world Wiki1M dataset around 0.1% nodes
are isolated.
---
 cpp/bench.cpp                   |  14 +++++
 cpp/test.cpp                    | 103 ++++++++++++++++++--------------
 include/usearch/index.hpp       |  47 ++++++++++++++-
 include/usearch/index_dense.hpp |   3 +
 python/scripts/test_index.py    |   4 +-
 5 files changed, 125 insertions(+), 46 deletions(-)

diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index 58fef199..dea00eca 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -322,6 +322,20 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t
         index_many(index, dataset.vectors_count(), ids.data(), dataset.vector(0), dataset.dimensions());
     }
 
+    // Measure index stats
+    using index_stats_t = typename index_at::stats_t;
+    index_stats_t global_stats = index.stats();
+    index_stats_t base_stats = index.stats(0);
+    std::size_t base_unreachable_nodes = index.unreachable_nodes(0);
+    std::printf("-- Nodes: %zu\n", global_stats.nodes);
+    std::printf("-- Edges: %zu (%.2f density)\n", global_stats.edges,
+                global_stats.edges * 100.f / global_stats.max_edges);
+    std::printf("-- Edges in base: %zu (%.2f %% density)\n", base_stats.edges,
+                base_stats.edges * 100.f / base_stats.max_edges);
+    std::printf("-- Memory usage: %.2e bytes\n", (double)global_stats.allocated_bytes);
+    std::printf("-- Unreachable nodes in base: %zu (%.3f %%)\n", base_unreachable_nodes,
+                base_unreachable_nodes * 100.f / global_stats.nodes);
+
     // Perform search, evaluate speed
     std::vector<default_key_t> found_neighbors(dataset.queries_count() * dataset.neighborhood_size());
     std::vector<distance_t> found_distances(dataset.queries_count() * dataset.neighborhood_size());
diff --git a/cpp/test.cpp b/cpp/test.cpp
index 755ed159..d00c0832 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -183,9 +183,11 @@ void test_minimal_three_vectors(index_at& index, //
     expect(index.add(key_first, vector_first.data(), args...));
 
     // Default approximate search
-    vector_key_t matched_keys[10] = {0};
-    distance_t matched_distances[10] = {0};
-    std::size_t matched_count = index.search(vector_first.data(), 5, args...).dump_to(matched_keys, matched_distances);
+    constexpr std::size_t oversubscribed_results = 777;
+    vector_key_t matched_keys[oversubscribed_results] = {0};
+    distance_t matched_distances[oversubscribed_results] = {0};
+    std::size_t matched_count =
+        index.search(vector_first.data(), oversubscribed_results, args...).dump_to(matched_keys, matched_distances);
 
     expect(matched_count == 1);
     expect(matched_keys[0] == key_first);
@@ -198,19 +200,20 @@ void test_minimal_three_vectors(index_at& index, //
 
     // Perform single entry search
     {
-        auto search_result = index.search(vector_first.data(), 5, args...);
+        auto search_result = index.search(vector_first.data(), oversubscribed_results, args...);
         expect(search_result);
         matched_count = search_result.dump_to(matched_keys, matched_distances);
-        expect(matched_count != 0);
+        expect(matched_count == 3);
     }
 
     // Perform filtered exact search, keeping only odd values
     if constexpr (punned_ak) {
         auto is_odd = [](vector_key_t key) -> bool { return (key & 1) != 0; };
-        auto search_result = index.filtered_search(vector_first.data(), 5, is_odd, args...);
+        auto search_result = index.filtered_search(vector_first.data(), oversubscribed_results, is_odd, args...);
         expect(search_result);
         matched_count = search_result.dump_to(matched_keys, matched_distances);
-        expect(matched_count != 0);
+        std::size_t count_odd = is_odd(key_first) + is_odd(key_second) + is_odd(key_third);
+        expect_eq(matched_count, count_odd);
         for (std::size_t i = 0; i < matched_count; i++)
             expect(is_odd(matched_keys[i]));
     }
@@ -248,11 +251,12 @@ void test_minimal_three_vectors(index_at& index, //
         expect(copy_result);
         auto& copied_index = copy_result.index;
 
-        // Perform single entry search
-        auto search_result = copied_index.search(vector_first.data(), 5, args...);
+        // Perform single entry search, over-subscribing,
+        // asking for more data than is present in the index
+        auto search_result = copied_index.search(vector_first.data(), oversubscribed_results, args...);
         expect(search_result);
         matched_count = search_result.dump_to(matched_keys, matched_distances);
-        expect(matched_count != 0);
+        expect(matched_count == 3);
 
         // Validate scans
         std::size_t count = 0;
@@ -270,10 +274,10 @@ void test_minimal_three_vectors(index_at& index, //
         index_at moved_index(std::move(index));
 
         // Perform single entry search
-        auto search_result = moved_index.search(vector_first.data(), 5, args...);
+        auto search_result = moved_index.search(vector_first.data(), oversubscribed_results, args...);
         expect(search_result);
         matched_count = search_result.dump_to(matched_keys, matched_distances);
-        expect(matched_count != 0);
+        expect(matched_count == 3);
 
         // Validate scans
         std::size_t count = 0;
@@ -297,7 +301,9 @@ void test_minimal_three_vectors(index_at& index, //
     auto load_result = index.load("tmp.usearch");
     expect(load_result);
     {
-        matched_count = index.search(vector_first.data(), 5, args...).dump_to(matched_keys, matched_distances);
+        matched_count = index //
+                            .search(vector_first.data(), oversubscribed_results, args...)
+                            .dump_to(matched_keys, matched_distances);
         expect_eq(matched_count, 3);
         expect_eq(matched_keys[0], key_first);
         expect(std::abs(matched_distances[0]) < 0.01);
@@ -345,17 +351,18 @@ void test_collection(index_at& index, typename index_at::vector_key_t const star
     std::size_t dimensions = vector_first.size();
 
     // Try batch requests, heavily oversubscribing the CPU cores
-    std::size_t executor_threads = std::thread::hardware_concurrency();
+    std::size_t executor_threads = 1; // std::thread::hardware_concurrency();
     executor_default_t executor(executor_threads);
     expect(index.try_reserve({vectors.size(), executor.size()}));
     executor.fixed(vectors.size(), [&](std::size_t thread, std::size_t task) {
+        auto task_data = vectors[task].data();
         if constexpr (punned_ak) {
-            index_add_result_t result = index.add(start_key + task, vectors[task].data(), args...);
+            index_add_result_t result = index.add(start_key + task, task_data, args...);
             expect(result);
         } else {
             index_update_config_t config;
             config.thread = thread;
-            index_add_result_t result = index.add(start_key + task, vectors[task].data(), args..., config);
+            index_add_result_t result = index.add(start_key + task, task_data, args..., config);
             expect(result);
         }
     });
@@ -367,30 +374,35 @@ void test_collection(index_at& index, typename index_at::vector_key_t const star
 
     // Parallel search over the same vectors
     executor.fixed(vectors.size(), [&](std::size_t thread, std::size_t task) {
-        std::size_t max_possible_matches = vectors.size();
-        std::size_t count_requested = max_possible_matches;
+        std::size_t const max_possible_matches = vectors.size();
+        std::size_t const count_requested = max_possible_matches * 10; // Oversubscribe
         std::vector<vector_key_t> matched_keys(count_requested);
         std::vector<distance_t> matched_distances(count_requested);
         std::size_t matched_count = 0;
+        auto task_data = vectors[task].data();
 
         // Invoke the search kernel
         if constexpr (punned_ak) {
-            index_search_result_t result = index.search(vectors[task].data(), count_requested, args...);
+            index_search_result_t result = index.search(task_data, count_requested, args...);
             expect(result);
             matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
+
+            // In approximate search we can't always expect the right answer to be found
+            expect_eq(matched_count, max_possible_matches);
+            expect_eq((vector_key_t)matched_keys[0], (vector_key_t)(start_key + task));
+            expect(std::abs(matched_distances[0]) < 0.01);
         } else {
             index_search_config_t config;
             config.thread = thread;
-            index_search_result_t result = index.search(vectors[task].data(), count_requested, args..., config);
+            index_search_result_t result = index.search(task_data, count_requested, args..., config);
             expect(result);
             matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
-        }
 
-        // In approximate search we can't always expect the right answer to be found
-        //      expect_eq(matched_count, max_possible_matches);
-        //      expect_eq(matched_keys[0], start_key + task);
-        //      expect(std::abs(matched_distances[0]) < 0.01);
-        expect(matched_count <= max_possible_matches);
+            // In approximate search we can't always expect the right answer to be found
+            expect_eq(matched_count, max_possible_matches);
+            expect_eq((vector_key_t)matched_keys[0], (vector_key_t)(start_key + task));
+            expect(std::abs(matched_distances[0]) < 0.01);
+        }
 
         // Check that all the distance are monotonically rising
         for (std::size_t i = 1; i < matched_count; i++)
@@ -424,16 +436,17 @@ void test_collection(index_at& index, typename index_at::vector_key_t const star
         std::vector<vector_key_t> matched_keys(count_requested);
         std::vector<distance_t> matched_distances(count_requested);
         std::size_t matched_count = 0;
+        auto task_data = vectors[task].data();
 
         // Invoke the search kernel
         if constexpr (punned_ak) {
-            index_search_result_t result = index.search(vectors[task].data(), count_requested, args...);
+            index_search_result_t result = index.search(task_data, count_requested, args...);
             expect(result);
             matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
         } else {
             index_search_config_t config;
             config.thread = thread;
-            index_search_result_t result = index.search(vectors[task].data(), count_requested, args..., config);
+            index_search_result_t result = index.search(task_data, count_requested, args..., config);
             expect(result);
             matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
         }
@@ -590,8 +603,8 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
             test_collection<false, index_typed_t>(*aligned_index.index, 42, vector_of_vectors, metric);
         }
     };
-    for (std::size_t connectivity : {3, 13, 50})
-        run_templated(connectivity);
+    // for (std::size_t connectivity : {3, 13, 50})
+    //     run_templated(connectivity);
 
     // Type-punned:
     auto run_punned = [&](bool multi, bool enable_key_lookups, std::size_t connectivity) {
@@ -980,25 +993,27 @@ int main(int, char**) {
     // Exact search without constructing indexes.
     // Great for validating the distance functions.
     std::printf("Testing exact search\n");
-    for (std::size_t dataset_count : {10, 100})
-        for (std::size_t queries_count : {1, 10})
-            for (std::size_t wanted_count : {1, 5})
-                test_exact_search(dataset_count, queries_count, wanted_count);
+    if (0)
+        for (std::size_t dataset_count : {10, 100})
+            for (std::size_t queries_count : {1, 10})
+                for (std::size_t wanted_count : {1, 5})
+                    test_exact_search(dataset_count, queries_count, wanted_count);
 
     // Make sure the initializers and the algorithms can work with inadequately small values.
     // Be warned - this combinatorial explosion of tests produces close to __500'000__ tests!
     std::printf("Testing allowed, but absurd index configs\n");
-    for (std::size_t connectivity : {2, 3})      // ! Zero maps to default, one degenerates
-        for (std::size_t dimensions : {1, 2, 3}) // ! Zero will raise
-            for (std::size_t expansion_add : {0, 1, 3})
-                for (std::size_t expansion_search : {0, 1, 3})
-                    for (std::size_t count_vectors : {0, 1, 2, 17})
-                        for (std::size_t count_wanted : {0, 1, 3, 19}) {
-                            test_absurd<std::int64_t, slot32_t>(dimensions, connectivity, expansion_add,
+    if (0)
+        for (std::size_t connectivity : {2, 3})      // ! Zero maps to default, one degenerates
+            for (std::size_t dimensions : {1, 2, 3}) // ! Zero will raise
+                for (std::size_t expansion_add : {0, 1, 3})
+                    for (std::size_t expansion_search : {0, 1, 3})
+                        for (std::size_t count_vectors : {0, 1, 2, 17})
+                            for (std::size_t count_wanted : {0, 1, 3, 19}) {
+                                test_absurd<std::int64_t, slot32_t>(dimensions, connectivity, expansion_add,
+                                                                    expansion_search, count_vectors, count_wanted);
+                                test_absurd<uint40_t, uint40_t>(dimensions, connectivity, expansion_add,
                                                                 expansion_search, count_vectors, count_wanted);
-                            test_absurd<uint40_t, uint40_t>(dimensions, connectivity, expansion_add, expansion_search,
-                                                            count_vectors, count_wanted);
-                        }
+                            }
 
     // TODO: Test absurd configs that are banned
     // for (metric_kind_t metric_kind : {metric_kind_t::cos_k, metric_kind_t::unknown_k, metric_kind_t::haversine_k}) {}
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index eb19cb8e..0d8d5b21 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -507,6 +507,15 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
         InterlockedAnd((long volatile*)&slots_[i / bits_per_slot()], ~mask);
     }
 
+    std::size_t count() const noexcept {
+        std::size_t result = 0;
+        for (std::size_t i = 0; i < count_; ++i) {
+            compressed_slot_t slot = slots_[i];
+            result += __popcnt64(slot);
+        }
+        return result;
+    }
+
 #else
 
     inline bool atomic_set(std::size_t i) noexcept {
@@ -519,6 +528,15 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
         __atomic_fetch_and(&slots_[i / bits_per_slot()], ~mask, __ATOMIC_RELEASE);
     }
 
+    std::size_t count() const noexcept {
+        std::size_t result = 0;
+        for (std::size_t i = 0; i < count_; ++i) {
+            compressed_slot_t slot = slots_[i];
+            result += __builtin_popcountll(slot);
+        }
+        return result;
+    }
+
 #endif
 
     class lock_t {
@@ -1892,7 +1910,8 @@ class index_gt {
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
 
-    using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>;
+    using bitset_t = bitset_gt<dynamic_allocator_t>;
+    using nodes_mutexes_t = bitset_t;
 
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
 
@@ -2814,6 +2833,32 @@ class index_gt {
         std::size_t allocated_bytes{};
     };
 
+    /**
+     *  @brief  An @b expensive operation that checks if the graph contains any unreachable nodes.
+     *
+     *  It's well known, that depending on a pruning heuristic, some nodes may become unreachable.
+     *  https://github.com/apache/lucene/issues/12627#issuecomment-1767662289
+     */
+    expected_gt<std::size_t> unreachable_nodes(std::size_t level = 0) const noexcept {
+        expected_gt<std::size_t> expected{};
+        level_t node_level = static_cast<level_t>(level);
+        if (node_level > max_level_)
+            return expected.failed("Level out of bounds");
+
+        std::size_t total_nodes = size();
+        bitset_t reachable_nodes(total_nodes);
+        if (!reachable_nodes)
+            return expected.failed("Can't allocate flags");
+
+        for (std::size_t i = 0; i != total_nodes; ++i) {
+            node_t node = node_at_(i);
+            for (auto neighbor : neighbors_(node, node_level))
+                reachable_nodes.atomic_set(static_cast<compressed_slot_t>(neighbor));
+        }
+        expected.result = total_nodes - reachable_nodes.count();
+        return expected;
+    }
+
     /**
      *  @brief  Aggregates stats on the number of nodes, edges, and memory usage across all levels.
      */
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index f0d16ba1..89e94b6b 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -695,6 +695,9 @@ class index_dense_gt {
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const {
         return typed_->stats(stats_per_level, max_level);
     }
+    expected_gt<std::size_t> unreachable_nodes(std::size_t level = 0) const noexcept {
+        return typed_->unreachable_nodes(level);
+    }
 
     dynamic_allocator_t const& allocator() const { return typed_->dynamic_allocator(); }
     vector_key_t const& free_key() const { return free_key_; }
diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py
index c9f09f81..bd79fd45 100644
--- a/python/scripts/test_index.py
+++ b/python/scripts/test_index.py
@@ -281,7 +281,9 @@ def test_index_oversubscribed_search(batch_size: int, threads: int):
     batch_matches: BatchMatches = index.search(vectors, batch_size * 10, threads=threads)
     for i, match in enumerate(batch_matches):
         assert i == match.keys[0]
-        assert len(match.keys) == batch_size
+        assert sorted(list(match.keys)) == list(
+            range(batch_size)
+        ), f"Missing values: {set(range(batch_size)) - set(match.keys)}"
 
 
 @pytest.mark.parametrize("ndim", [3, 97, 256])

From e69c0e50e090c729e920d5601043bcfe74713d35 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Sun, 23 Jun 2024 02:10:14 +0000
Subject: [PATCH 3/5] Add: `unreachable_nodes()` API

Sometimes, knowing `disconnected_nodes()` number is not enough.
To know exactly which nodes are disconnected from the member
nodes of the current level, the upgraded API returns a `bitset_t`.

Moreover, multiple nodes may have links referencing each-other
and forming a connected component, but still be disconnected from
the rest of the hierarchical graph. To detect those - a
new `unreachable_nodes()` API was added.

This patch also refactors `bitset_gt` and `ring_gt`, extending
their functionality and fixing compilation errors.
---
 cpp/bench.cpp                   |  15 +-
 include/usearch/index.hpp       | 233 +++++++++++++++++++++++---------
 include/usearch/index_dense.hpp |   7 +-
 3 files changed, 185 insertions(+), 70 deletions(-)

diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index dea00eca..10e21e93 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -326,15 +326,22 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t
     using index_stats_t = typename index_at::stats_t;
     index_stats_t global_stats = index.stats();
     index_stats_t base_stats = index.stats(0);
-    std::size_t base_unreachable_nodes = index.unreachable_nodes(0);
+    std::size_t base_disconnected_nodes = (*index.disconnected_nodes(0)).count();
+    std::size_t unreachable_nodes = (*index.unreachable_nodes()).count();
     std::printf("-- Nodes: %zu\n", global_stats.nodes);
-    std::printf("-- Edges: %zu (%.2f density)\n", global_stats.edges,
+    std::printf("-- Edges: %zu (%.2f %% density)\n", global_stats.edges,
                 global_stats.edges * 100.f / global_stats.max_edges);
     std::printf("-- Edges in base: %zu (%.2f %% density)\n", base_stats.edges,
                 base_stats.edges * 100.f / base_stats.max_edges);
+    std::printf("-- Edges above base: %zu (%.2f %% density)\n", global_stats.edges - base_stats.edges,
+                (global_stats.edges - base_stats.edges) * 100.f / (global_stats.max_edges - base_stats.max_edges));
     std::printf("-- Memory usage: %.2e bytes\n", (double)global_stats.allocated_bytes);
-    std::printf("-- Unreachable nodes in base: %zu (%.3f %%)\n", base_unreachable_nodes,
-                base_unreachable_nodes * 100.f / global_stats.nodes);
+    std::printf("-- Memory usage in base: %.2e bytes (%.2f %%)\n", (double)base_stats.allocated_bytes,
+                base_stats.allocated_bytes * 100.f / global_stats.allocated_bytes);
+    std::printf("-- Disconnected nodes in base: %zu (%.3f %%)\n", base_disconnected_nodes,
+                base_disconnected_nodes * 100.f / global_stats.nodes);
+    std::printf("-- Unreachable nodes: %zu (%.3f %%)\n", unreachable_nodes,
+                unreachable_nodes * 100.f / global_stats.nodes);
 
     // Perform search, evaluate speed
     std::vector<default_key_t> found_neighbors(dataset.queries_count() * dataset.neighborhood_size());
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 0d8d5b21..d4b34387 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -434,84 +434,95 @@ template <typename result_at> struct expected_gt {
  *  @brief  Light-weight bitset implementation to sync nodes updates during graph mutations.
  *          Extends basic functionality with @b atomic operations.
  */
-template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
+template <typename allocator_at = std::allocator<byte_t>> struct bitset_gt {
+
+    using word_t = unsigned long;
+
+  private:
     using allocator_t = allocator_at;
     using byte_t = typename allocator_t::value_type;
     static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes");
 
-    using compressed_slot_t = unsigned long;
-
-    static constexpr std::size_t bits_per_slot() { return sizeof(compressed_slot_t) * CHAR_BIT; }
-    static constexpr compressed_slot_t bits_mask() { return sizeof(compressed_slot_t) * CHAR_BIT - 1; }
-    static constexpr std::size_t slots(std::size_t bits) { return divide_round_up<bits_per_slot()>(bits); }
+    static constexpr std::size_t bits_per_word() { return sizeof(word_t) * CHAR_BIT; }
+    static constexpr word_t bits_mask() { return sizeof(word_t) * CHAR_BIT - 1; }
+    static constexpr std::size_t words_count(std::size_t bits) {
+        return bits ? divide_round_up<bits_per_word()>(bits) : 0;
+    }
 
-    compressed_slot_t* slots_{};
-    /// @brief Number of slots.
-    std::size_t count_{};
+    word_t* words_{};
+    std::size_t bits_count_{};
 
   public:
     bitset_gt() noexcept {}
     ~bitset_gt() noexcept { reset(); }
 
-    explicit operator bool() const noexcept { return slots_; }
+    explicit operator bool() const noexcept { return words_; }
     void clear() noexcept {
-        if (slots_)
-            std::memset(slots_, 0, count_ * sizeof(compressed_slot_t));
+        if (words_)
+            std::memset(words_, 0, words_count() * sizeof(word_t));
     }
 
     void reset() noexcept {
-        if (slots_)
-            allocator_t{}.deallocate((byte_t*)slots_, count_ * sizeof(compressed_slot_t));
-        slots_ = nullptr;
-        count_ = 0;
+        if (words_)
+            allocator_t{}.deallocate((byte_t*)words_, words_count() * sizeof(word_t));
+        words_ = nullptr;
+        bits_count_ = 0;
     }
 
     bitset_gt(std::size_t capacity) noexcept
-        : slots_((compressed_slot_t*)allocator_t{}.allocate(slots(capacity) * sizeof(compressed_slot_t))),
-          count_(slots_ ? slots(capacity) : 0u) {
+        : words_((word_t*)allocator_t{}.allocate(words_count(capacity) * sizeof(word_t))), bits_count_(capacity) {
         clear();
     }
 
     bitset_gt(bitset_gt&& other) noexcept {
-        slots_ = exchange(other.slots_, nullptr);
-        count_ = exchange(other.count_, 0);
+        words_ = exchange(other.words_, nullptr);
+        bits_count_ = exchange(other.bits_count_, 0);
     }
 
     bitset_gt& operator=(bitset_gt&& other) noexcept {
-        std::swap(slots_, other.slots_);
-        std::swap(count_, other.count_);
+        std::swap(words_, other.words_);
+        std::swap(bits_count_, other.bits_count_);
         return *this;
     }
 
     bitset_gt(bitset_gt const&) = delete;
     bitset_gt& operator=(bitset_gt const&) = delete;
 
-    inline bool test(std::size_t i) const noexcept { return slots_[i / bits_per_slot()] & (1ul << (i & bits_mask())); }
+    inline std::size_t words_count() const noexcept { return words_count(bits_count_); }
+    inline span_gt<word_t> words() noexcept { return {words_, words_count()}; }
+    inline bool test(std::size_t i) const noexcept { return words_[i / bits_per_word()] & (1ul << (i & bits_mask())); }
     inline bool set(std::size_t i) noexcept {
-        compressed_slot_t& slot = slots_[i / bits_per_slot()];
-        compressed_slot_t mask{1ul << (i & bits_mask())};
-        bool value = slot & mask;
-        slot |= mask;
-        return value;
+        word_t& word = words_[i / bits_per_word()];
+        word_t mask{1ul << (i & bits_mask())};
+        bool old_value = word & mask;
+        word |= mask;
+        return old_value;
+    }
+    inline bool reset(std::size_t i) noexcept {
+        word_t& word = words_[i / bits_per_word()];
+        word_t mask{1ul << (i & bits_mask())};
+        bool old_value = word & mask;
+        word &= ~mask;
+        return old_value;
     }
 
 #if defined(USEARCH_DEFINED_WINDOWS)
 
     inline bool atomic_set(std::size_t i) noexcept {
-        compressed_slot_t mask{1ul << (i & bits_mask())};
-        return InterlockedOr((long volatile*)&slots_[i / bits_per_slot()], mask) & mask;
+        word_t mask{1ul << (i & bits_mask())};
+        return InterlockedOr((long volatile*)&words_[i / bits_per_word()], mask) & mask;
     }
 
     inline void atomic_reset(std::size_t i) noexcept {
-        compressed_slot_t mask{1ul << (i & bits_mask())};
-        InterlockedAnd((long volatile*)&slots_[i / bits_per_slot()], ~mask);
+        word_t mask{1ul << (i & bits_mask())};
+        InterlockedAnd((long volatile*)&words_[i / bits_per_word()], ~mask);
     }
 
     std::size_t count() const noexcept {
         std::size_t result = 0;
-        for (std::size_t i = 0; i < count_; ++i) {
-            compressed_slot_t slot = slots_[i];
-            result += __popcnt64(slot);
+        for (std::size_t i = 0; i < words_count(); ++i) {
+            word_t word = words_[i];
+            result += __popcnt64(word);
         }
         return result;
     }
@@ -519,26 +530,39 @@ template <typename allocator_at = std::allocator<byte_t>> class bitset_gt {
 #else
 
     inline bool atomic_set(std::size_t i) noexcept {
-        compressed_slot_t mask{1ul << (i & bits_mask())};
-        return __atomic_fetch_or(&slots_[i / bits_per_slot()], mask, __ATOMIC_ACQUIRE) & mask;
+        word_t mask{1ul << (i & bits_mask())};
+        return __atomic_fetch_or(&words_[i / bits_per_word()], mask, __ATOMIC_ACQUIRE) & mask;
     }
 
     inline void atomic_reset(std::size_t i) noexcept {
-        compressed_slot_t mask{1ul << (i & bits_mask())};
-        __atomic_fetch_and(&slots_[i / bits_per_slot()], ~mask, __ATOMIC_RELEASE);
+        word_t mask{1ul << (i & bits_mask())};
+        __atomic_fetch_and(&words_[i / bits_per_word()], ~mask, __ATOMIC_RELEASE);
     }
 
     std::size_t count() const noexcept {
         std::size_t result = 0;
-        for (std::size_t i = 0; i < count_; ++i) {
-            compressed_slot_t slot = slots_[i];
-            result += __builtin_popcountll(slot);
+        for (std::size_t i = 0; i < words_count(); ++i) {
+            word_t word = words_[i];
+            result += __builtin_popcountll(word);
         }
         return result;
     }
 
 #endif
 
+    void flip() noexcept {
+        if (!bits_count_)
+            return;
+
+        word_t* const end = words_ + words_count();
+        for (word_t* it = words_; it != end; ++it)
+            *it = ~(*it);
+
+        // We have to be carefull with the last word, as it might have unused bits.
+        for (std::size_t i = bits_count_; i != words_count() * bits_per_word(); ++i)
+            reset(i);
+    }
+
     class lock_t {
         bitset_gt& bitset_;
         std::size_t bit_offset_;
@@ -1158,10 +1182,11 @@ class ring_gt {
     }
 
     bool try_push(element_t const& value) noexcept {
-        if (head_ == tail_ && !empty_)
-            return false; // `elements_` is full
+        if (head_ == tail_ && (!empty_ || !capacity_)) // `elements_` is full
+            if (!reserve(capacity_ + 1))
+                return false;
 
-        return push(value);
+        push(value);
         return true;
     }
 
@@ -1762,11 +1787,11 @@ template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m)
  *
  *  @section Features
  *
- *      - Thread-safe for concurrent construction, search, and updates.
- *      - Doesn't allocate new threads, and reuses the ones its called from.
- *      - Allows storing value externally, managing just the similarity index.
- *      - Joins.
-
+ *  - Thread-safe for concurrent construction, search, and updates.
+ *  - Doesn't allocate new threads, and reuses the ones its called from.
+ *  - Allows storing value externally, managing just the similarity index.
+ *  - Joins.
+ *
  *  @section Usage
  *
  *  @subsection Exceptions
@@ -1795,19 +1820,39 @@ template <typename key_at> inline key_at get_key(member_ref_gt<key_at> const& m)
  *  tallest "level" of the graph that it belongs to, the external "key", and the
  *  number of "dimensions" in the vector.
  *
- *  @section Metrics, Predicates and Callbacks
+ *  @section Metrics, Predicates, and Callbacks
  *
+ *  Metrics:
+ *  - Metrics are functions or functors used to compute the distance (dis-similarity)
+ *    between two objects.
+ *  - The metric must be callable in different contexts:
+ *      - `distance_t operator() (value_at, entry_at)`: Calculates the distance between a new object
+ *        and an existing entry.
+ *      - `distance_t operator() (entry_at, entry_at)`: Calculates the distance between two existing entries.
+ *  - Any possible `entry_at` must support the following interfaces:
+ *      - `std::size_t slot()`
+ *      - `vector_key_t key()`
  *
- *  @section Smart References and Iterators
- *
- *      -   `member_citerator_t` and `member_iterator_t` have only slots, no indirections.
+ *  Predicates:
+ *  - Predicates are used to filter the results during the search process.
+ *  - The predicate is a callable object that takes a `member_cref_t` and returns a boolean value.
+ *  - Only entries for which the predicate returns `true` will be considered in the final result.
  *
- *      -   `member_cref_t` and `member_ref_t` contains the `slot` and a reference
- *          to the key. So it passes through 1 level of visited_members in `nodes_`.
- *          Retrieving the key via `get_key` will cause fetching yet another cache line.
+ *  Callbacks:
+ *  - Callbacks are user-defined functions that are executed on specific events, such as a successful addition
+ *    or update of an entry.
+ *  - The callback is executed while the `member_ref_t` is still under lock, ensuring that the operation
+ *    remains thread-safe.
+ *  - Callbacks can be used for custom operations, such as logging, additional processing, or integration
+ *    with other systems.
  *
- *      -   `member_gt` contains an already prefetched copy of the key.
+ *  @section Smart References and Iterators
  *
+ *  - `member_citerator_t` and `member_iterator_t` only contain slots, with no indirections.
+ *  - `member_cref_t` and `member_ref_t` contain the slot and a reference to the key,
+ *    passing through one level of visited members in `nodes_`. Retrieving the key via `get_key`
+ *    will fetch yet another cache line.
+ *  - `member_gt` contains a prefetched copy of the key.
  */
 template <typename distance_at = default_distance_t,              //
           typename key_at = default_key_t,                        //
@@ -1896,6 +1941,8 @@ class index_gt {
         sizeof(typename tape_allocator_traits_t::value_type) == 1, //
         "Tape allocator must allocate separate addressable bytes");
 
+    using bitset_t = bitset_gt<dynamic_allocator_t>;
+
   private:
     /**
      *  @brief  Integer for the number of node neighbors at a specific level of the
@@ -1910,7 +1957,6 @@ class index_gt {
      */
     static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
 
-    using bitset_t = bitset_gt<dynamic_allocator_t>;
     using nodes_mutexes_t = bitset_t;
 
     using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>;
@@ -2068,6 +2114,7 @@ class index_gt {
     buffer_gt<node_t, nodes_allocator_t> nodes_{};
 
     /// @brief  Mutex, that limits concurrent access to `nodes_`.
+    ///         This structure must be as small as possible to fit more into CPU caches.
     mutable nodes_mutexes_t nodes_mutexes_{};
 
     using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>;
@@ -2834,13 +2881,14 @@ class index_gt {
     };
 
     /**
-     *  @brief  An @b expensive operation that checks if the graph contains any unreachable nodes.
+     *  @brief  An @b expensive operation that checks if the graph contains any disconnected nodes,
+     *          in other words, nodes that don't have a single other node pointing to them.
      *
-     *  It's well known, that depending on a pruning heuristic, some nodes may become unreachable.
+     *  It's well known, that depending on a pruning heuristic, some nodes may become disconnected.
      *  https://github.com/apache/lucene/issues/12627#issuecomment-1767662289
      */
-    expected_gt<std::size_t> unreachable_nodes(std::size_t level = 0) const noexcept {
-        expected_gt<std::size_t> expected{};
+    expected_gt<bitset_t> disconnected_nodes(std::size_t level = 0) const noexcept {
+        expected_gt<bitset_t> expected{};
         level_t node_level = static_cast<level_t>(level);
         if (node_level > max_level_)
             return expected.failed("Level out of bounds");
@@ -2855,7 +2903,64 @@ class index_gt {
             for (auto neighbor : neighbors_(node, node_level))
                 reachable_nodes.atomic_set(static_cast<compressed_slot_t>(neighbor));
         }
-        expected.result = total_nodes - reachable_nodes.count();
+
+        // Once we know which nodes are reachable, toggling all the bits will give us the unreachable ones
+        expected.result = std::move(reachable_nodes);
+        expected.result.flip();
+        return expected;
+    }
+
+    /**
+     *  @brief  An @b expensive & @b sequential operation that checks if the graph contains any unreachable nodes,
+     *          in other words, nodes that can't be reached from the top-level root. The result is
+     *          greater or equal to `disconnected_nodes(0)`.
+     *
+     *  It's well known, that depending on a pruning heuristic, some nodes may become unreachable.
+     *  https://github.com/apache/lucene/issues/12627#issuecomment-1767662289
+     */
+    expected_gt<bitset_t> unreachable_nodes() const noexcept {
+        expected_gt<bitset_t> expected{};
+
+        std::size_t total_nodes = size();
+        bitset_t reachable_nodes(total_nodes), reachable_level_nodes(total_nodes);
+        if (!reachable_nodes || !reachable_level_nodes)
+            return expected.failed("Can't allocate flags");
+        reachable_nodes.set(static_cast<compressed_slot_t>(entry_slot_));
+        reachable_level_nodes.set(static_cast<compressed_slot_t>(entry_slot_));
+
+        // For BFS traversal we need a queue
+        ring_gt<compressed_slot_t> next_nodes, previous_level_nodes;
+        if (!previous_level_nodes.try_push(static_cast<compressed_slot_t>(entry_slot_)))
+            return expected.failed("Can't allocate BFS queue");
+
+        // That one queue will be reused across all levels
+        for (level_t level = max_level_; level >= 0; --level) {
+
+            // The starting nodes of the level are the points of the previous level
+            for (compressed_slot_t slot; previous_level_nodes.try_pop(slot);)
+                if (!next_nodes.try_push(slot))
+                    return expected.failed("Can't grow BFS queue");
+            reachable_level_nodes.clear();
+
+            for (compressed_slot_t current_slot; next_nodes.try_pop(current_slot);) {
+                node_t current_node = node_at_(current_slot);
+                for (auto neighbor : neighbors_(current_node, level)) {
+                    if (!reachable_level_nodes.set(static_cast<compressed_slot_t>(neighbor))) {
+                        reachable_nodes.set(static_cast<compressed_slot_t>(neighbor));
+                        if (!next_nodes.try_push(static_cast<compressed_slot_t>(neighbor)))
+                            return expected.failed("Can't grow BFS queue");
+
+                        // Aggregate an append-only list of nodes if only we are not in the base level
+                        if (level && !previous_level_nodes.try_push(static_cast<compressed_slot_t>(neighbor)))
+                            return expected.failed("Can't grow previous level list");
+                    }
+                }
+            }
+        }
+
+        // Once we know which nodes are reachable, toggling all the bits will give us the unreachable ones
+        expected.result = std::move(reachable_nodes);
+        expected.result.flip();
         return expected;
     }
 
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 89e94b6b..9991942c 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -508,6 +508,7 @@ class index_dense_gt {
     using add_result_t = typename index_t::add_result_t;
     using stats_t = typename index_t::stats_t;
     using match_t = typename index_t::match_t;
+    using bitset_t = typename index_t::bitset_t;
 
     /**
      *  @brief  A search result, containing the found keys and distances.
@@ -695,8 +696,10 @@ class index_dense_gt {
     stats_t stats(stats_t* stats_per_level, std::size_t max_level) const {
         return typed_->stats(stats_per_level, max_level);
     }
-    expected_gt<std::size_t> unreachable_nodes(std::size_t level = 0) const noexcept {
-        return typed_->unreachable_nodes(level);
+
+    expected_gt<bitset_t> unreachable_nodes() const noexcept { return typed_->unreachable_nodes(); }
+    expected_gt<bitset_t> disconnected_nodes(std::size_t level = 0) const noexcept {
+        return typed_->disconnected_nodes(level);
     }
 
     dynamic_allocator_t const& allocator() const { return typed_->dynamic_allocator(); }

From 5bddeedd8e80a0a57ab78a4cc7730a34cfc8f794 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Mon, 24 Jun 2024 17:55:03 +0000
Subject: [PATCH 4/5] Improve: `noexcept` annotation

---
 include/usearch/index_dense.hpp | 54 ++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 9991942c..9a901548 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -656,22 +656,22 @@ class index_dense_gt {
         return result;
     }
 
-    explicit operator bool() const { return typed_; }
-    std::size_t connectivity() const { return typed_->connectivity(); }
-    std::size_t size() const { return typed_->size() - free_keys_.size(); }
-    std::size_t capacity() const { return typed_->capacity(); }
+    explicit operator bool() const noexcept { return typed_; }
+    std::size_t connectivity() const noexcept { return typed_->connectivity(); }
+    std::size_t size() const noexcept { return typed_->size() - free_keys_.size(); }
+    std::size_t capacity() const noexcept { return typed_->capacity(); }
     std::size_t max_level() const noexcept { return typed_->max_level(); }
     index_dense_config_t const& config() const { return config_; }
-    index_limits_t const& limits() const { return typed_->limits(); }
-    bool multi() const { return config_.multi; }
-    std::size_t currently_available_threads() const {
+    index_limits_t const& limits() const noexcept { return typed_->limits(); }
+    bool multi() const noexcept { return config_.multi; }
+    std::size_t currently_available_threads() const noexcept {
         std::unique_lock<std::mutex> available_threads_lock(available_threads_mutex_);
         return available_threads_.size();
     }
 
     // The metric and its properties
-    metric_t const& metric() const { return metric_; }
-    void change_metric(metric_t metric) { metric_ = std::move(metric); }
+    metric_t const& metric() const noexcept { return metric_; }
+    void change_metric(metric_t metric) noexcept { metric_ = std::move(metric); }
 
     scalar_kind_t scalar_kind() const noexcept { return metric_.scalar_kind(); }
     std::size_t bytes_per_vector() const noexcept { return metric_.bytes_per_vector(); }
@@ -679,21 +679,21 @@ class index_dense_gt {
     std::size_t dimensions() const noexcept { return metric_.dimensions(); }
 
     // Fetching and changing search criteria
-    std::size_t expansion_add() const { return config_.expansion_add; }
-    std::size_t expansion_search() const { return config_.expansion_search; }
-    void change_expansion_add(std::size_t n) { config_.expansion_add = n; }
-    void change_expansion_search(std::size_t n) { config_.expansion_search = n; }
-
-    member_citerator_t cbegin() const { return typed_->cbegin(); }
-    member_citerator_t cend() const { return typed_->cend(); }
-    member_citerator_t begin() const { return typed_->begin(); }
-    member_citerator_t end() const { return typed_->end(); }
-    member_iterator_t begin() { return typed_->begin(); }
-    member_iterator_t end() { return typed_->end(); }
-
-    stats_t stats() const { return typed_->stats(); }
-    stats_t stats(std::size_t level) const { return typed_->stats(level); }
-    stats_t stats(stats_t* stats_per_level, std::size_t max_level) const {
+    std::size_t expansion_add() const noexcept { return config_.expansion_add; }
+    std::size_t expansion_search() const noexcept { return config_.expansion_search; }
+    void change_expansion_add(std::size_t n) noexcept { config_.expansion_add = n; }
+    void change_expansion_search(std::size_t n) noexcept { config_.expansion_search = n; }
+
+    member_citerator_t cbegin() const noexcept { return typed_->cbegin(); }
+    member_citerator_t cend() const noexcept { return typed_->cend(); }
+    member_citerator_t begin() const noexcept { return typed_->begin(); }
+    member_citerator_t end() const noexcept { return typed_->end(); }
+    member_iterator_t begin() noexcept { return typed_->begin(); }
+    member_iterator_t end() noexcept { return typed_->end(); }
+
+    stats_t stats() const noexcept { return typed_->stats(); }
+    stats_t stats(std::size_t level) const noexcept { return typed_->stats(level); }
+    stats_t stats(stats_t* stats_per_level, std::size_t max_level) const noexcept {
         return typed_->stats(stats_per_level, max_level);
     }
 
@@ -702,8 +702,8 @@ class index_dense_gt {
         return typed_->disconnected_nodes(level);
     }
 
-    dynamic_allocator_t const& allocator() const { return typed_->dynamic_allocator(); }
-    vector_key_t const& free_key() const { return free_key_; }
+    dynamic_allocator_t const& allocator() const noexcept { return typed_->dynamic_allocator(); }
+    vector_key_t const& free_key() const noexcept { return free_key_; }
 
     /**
      *  @brief  A relatively accurate lower bound on the amount of memory consumed by the system.
@@ -711,7 +711,7 @@ class index_dense_gt {
      *
      *  @see    `serialized_length` for the length of the binary serialized representation.
      */
-    std::size_t memory_usage() const {
+    std::size_t memory_usage() const noexcept {
         return                                          //
             typed_->memory_usage(0) +                   //
             typed_->tape_allocator().total_wasted() +   //

From c07751b077b85190082093679ecd38f8af48a915 Mon Sep 17 00:00:00 2001
From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>
Date: Wed, 21 Aug 2024 19:15:49 +0000
Subject: [PATCH 5/5] Add: `saturate` API

---
 cpp/test.cpp              | 13 ++++--
 include/usearch/index.hpp | 95 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/cpp/test.cpp b/cpp/test.cpp
index d00c0832..7e2361c9 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -383,9 +383,16 @@ void test_collection(index_at& index, typename index_at::vector_key_t const star
 
         // Invoke the search kernel
         if constexpr (punned_ak) {
-            index_search_result_t result = index.search(task_data, count_requested, args...);
-            expect(result);
-            matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
+            {
+                index_search_result_t result = index.search(task_data, count_requested, args...);
+                expect(result);
+                matched_count = result.dump_to(matched_keys.data(), matched_distances.data());
+            }
+
+            if (matched_count != max_possible_matches) {
+                auto unreachable_count = index.unreachable_nodes();
+                index_search_result_t other_result = index.search(task_data, count_requested, args...);
+            }
 
             // In approximate search we can't always expect the right answer to be found
             expect_eq(matched_count, max_possible_matches);
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index d4b34387..af7894f8 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -3552,6 +3552,101 @@ class index_gt {
         progress(processed.load(), nodes_count);
     }
 
+    /**
+     *  @brief  Scans the whole collection, maximizing the number of links
+     *          from every entry, and ensuring that the graph is fully connected.
+     *
+     *  @param[in] executor Thread-pool to execute the job in parallel.
+     *  @param[in] progress Callback to report the execution progress.
+     *  @return The number of added links.
+     */
+    template <                                        //
+        typename allow_member_at = dummy_predicate_t, //
+        typename executor_at = dummy_executor_t,      //
+        typename progress_at = dummy_progress_t       //
+        >
+    expected_gt<std::size_t> saturate(          //
+        executor_at&& executor = executor_at{}, //
+        progress_at&& progress = progress_at{}) noexcept {
+
+        expected_gt<std::size_t> expected{};
+        std::size_t total_nodes = size();
+
+        // We can use as little as just a bitset to track the presence of an incoming link,
+        // but as we start rebalancing the graph, we may need to prune and replace existing links.
+        // That may produce new isolated components of the graph, so instead of a boolean - let's
+        // keep a reference counter. For simplicity, let's use STL's `std::atomic`.
+        // For performance, let's avoid `compressed_slot_t` if it's a non-trivial integral
+        // type and use a larger integer instead.
+        using ref_counter_t = typename std::conditional< //
+            std::is_integral<compressed_slot_t>::value || (sizeof(compressed_slot_t) > sizeof(std::uint64_t)),
+            compressed_slot_t, std::uint64_t>::type;
+        using atomic_ref_counter_t = std::atomic<ref_counter_t>;
+        buffer_gt<atomic_ref_counter_t> incoming_links(total_nodes);
+        if (!incoming_links)
+            return expected.failed("Can't allocate flags");
+
+        for (level_t level = 0; level <= max_level_; ++level) {
+
+            // First of all, ensure we don't have disconnected entries in this layer
+            incoming_links.clear();
+            executor.dynamic(total_nodes, [&](std::size_t, std::size_t node_idx) {
+                node_t node = node_at_(node_idx);
+                if (static_cast<std::size_t>(node.level()) < level)
+                    return true;
+                for (auto neighbor : neighbors_(node, level))
+                    incoming_links[static_cast<compressed_slot_t>(neighbor)].fetch_add(1, std::memory_order_relaxed);
+                return true;
+            });
+
+            // If there are no unreachable nodes, we can save some time.
+            // Generally, in large graphs, no more than 0.1% of nodes are unreachable.
+            // Unfortunatelly, the `std::transform_reduce` is only available in C++17 and newer.
+            std::size_t count_unreachable = 0;
+            for (auto const& ref_counter : incoming_links)
+                count_unreachable += ref_counter.load(std::memory_order_relaxed) == 0;
+
+            if (count_unreachable) {
+                for (std::size_t i = 0; i != incoming_links.size(); ++i) {
+                    // Skip connected and reachable nodes
+                    if (incoming_links[i])
+                        continue;
+                }
+            }
+
+            // Now iterate through all the nodes again and add "skip connections",
+            // that would lead to the closes second-degree connections.
+        }
+
+        // Progress status
+        std::atomic<bool> do_tasks{true};
+        std::atomic<std::size_t> processed{0};
+
+        // Erase all the incoming links
+        std::size_t nodes_count = size();
+        executor.dynamic(nodes_count, [&](std::size_t thread_idx, std::size_t node_idx) {
+            node_t node = node_at_(node_idx);
+            for (level_t level = 0; level <= node.level(); ++level) {
+                neighbors_ref_t neighbors = neighbors_(node, level);
+                std::size_t old_size = neighbors.size();
+                neighbors.clear();
+                for (std::size_t i = 0; i != old_size; ++i) {
+                    compressed_slot_t neighbor_slot = neighbors[i];
+                    node_t neighbor = node_at_(neighbor_slot);
+                    if (allow_member(member_cref_t{neighbor.ckey(), neighbor_slot}))
+                        neighbors.push_back(neighbor_slot);
+                }
+            }
+            ++processed;
+            if (thread_idx == 0)
+                do_tasks = progress(processed.load(), nodes_count);
+            return do_tasks.load();
+        });
+
+        // At the end report the latest numbers, because the reporter thread may be finished earlier
+        progress(processed.load(), nodes_count);
+    }
+
   private:
     inline static precomputed_constants_t precompute_(index_config_t const& config) noexcept {
         precomputed_constants_t pre;