diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index c2f85e539..eed18272c 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -37,6 +37,19 @@ enum class MemoryType {
   kHostMmap,
   kHostPinned,
   kDevice,
+  kManaged,
+};
+
+/** Request 2MB huge pages support for an allocation */
+enum class HugePages {
+  /** Don't use huge pages if possible. */
+  kDisable = 0,
+  /** Enable huge pages if possible, ignore otherwise. */
+  kAsk = 1,
+  /** Enable huge pages if possible, warn the user otherwise. */
+  kRequire = 2,
+  /** Force enable huge pages, throw an exception if not possible. */
+  kDemand = 3
 };
 
 enum class Metric {
@@ -65,6 +78,8 @@ inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
     return MemoryType::kHostPinned;
   } else if (memory_type == "device") {
     return MemoryType::kDevice;
+  } else if (memory_type == "managed") {
+    return MemoryType::kManaged;
   } else {
     throw std::runtime_error("invalid memory type: '" + memory_type + "'");
   }
@@ -130,7 +145,7 @@ class algo : public algo_base {
 
   virtual void build(const T* dataset, size_t nrow) = 0;
 
-  virtual void set_search_param(const search_param& param) = 0;
+  virtual void set_search_param(const search_param& param, const void* filter_bitset) = 0;
   // TODO(snanditale): this assumes that an algorithm can always return k results.
   // This is not always possible.
   virtual void search(const T* queries,
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 49be78673..8d9c30cb1 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -271,7 +271,8 @@ void bench_search(::benchmark::State& state,
       }
     }
     try {
-      a->set_search_param(*search_param);
+      a->set_search_param(*search_param,
+                          dataset->filter_bitset(current_algo_props->dataset_memory_type));
     } catch (const std::exception& ex) {
       state.SkipWithError("An error occurred setting search parameters: " + std::string(ex.what()));
       return;
@@ -359,13 +360,19 @@ void bench_search(::benchmark::State& state,
   // Each thread calculates recall on their partition of queries.
   // evaluate recall
   if (dataset->max_k() >= k) {
-    const std::int32_t* gt    = dataset->gt_set();
+    const std::int32_t* gt             = dataset->gt_set();
+    const std::uint32_t* filter_bitset = dataset->filter_bitset(MemoryType::kHostMmap);
+    auto filter                        = [filter_bitset](std::int32_t i) -> bool {
+      if (filter_bitset == nullptr) { return true; }
+      auto word = filter_bitset[i >> 5];
+      return word & (1 << (i & 31));
+    };
     const std::uint32_t max_k = dataset->max_k();
     result_buf.transfer_data(MemoryType::kHost, current_algo_props->query_memory_type);
     auto* neighbors_host    = reinterpret_cast<index_type*>(result_buf.data(MemoryType::kHost));
     std::size_t rows        = std::min(queries_processed, query_set_size);
     std::size_t match_count = 0;
-    std::size_t total_count = rows * static_cast<size_t>(k);
+    std::size_t total_count = 0;
 
     // We go through the groundtruth with same stride as the benchmark loop.
     size_t out_offset   = 0;
@@ -375,22 +382,44 @@ void bench_search(::benchmark::State& state,
         size_t i_orig_idx = batch_offset + i;
         size_t i_out_idx  = out_offset + i;
         if (i_out_idx < rows) {
-          for (std::uint32_t j = 0; j < k; j++) {
-            auto act_idx = static_cast<std::int32_t>(neighbors_host[i_out_idx * k + j]);
-            for (std::uint32_t l = 0; l < k; l++) {
-              auto exp_idx = gt[i_orig_idx * max_k + l];
+          /* NOTE: recall correctness & filtering
+
+          In the loop below, we filter the ground truth values on-the-fly.
+          We need enough ground truth values to compute recall correctly though.
+          But the ground truth file only contains `max_k` values per row; if there are less valid
+          values than k among them, we overestimate the recall. Essentially, we compare the first
+          `filter_pass_count` values of the algorithm output, and this counter can be less than `k`.
+          In the extreme case of very high filtering rate, we may be bypassing entire rows of
+          results. However, this is still better than no recall estimate at all.
+
+          TODO: consider generating the filtered ground truth on-the-fly
+          */
+          uint32_t filter_pass_count = 0;
+          for (std::uint32_t l = 0; l < max_k && filter_pass_count < k; l++) {
+            auto exp_idx = gt[i_orig_idx * max_k + l];
+            if (!filter(exp_idx)) { continue; }
+            filter_pass_count++;
+            for (std::uint32_t j = 0; j < k; j++) {
+              auto act_idx = static_cast<std::int32_t>(neighbors_host[i_out_idx * k + j]);
               if (act_idx == exp_idx) {
                 match_count++;
                 break;
               }
             }
           }
+          total_count += filter_pass_count;
         }
       }
       out_offset += n_queries;
       batch_offset = (batch_offset + queries_stride) % query_set_size;
     }
     double actual_recall = static_cast<double>(match_count) / static_cast<double>(total_count);
+    /* NOTE: recall in the throughput mode & filtering
+
+    When filtering is enabled, `total_count` may vary between individual threads, but we still take
+    the simple average across in-thread recalls. Strictly speaking, this is incorrect, but it's good
+    enough under assumption that the filtering is more-or-less uniform.
+    */
     state.counters.insert({"Recall", {actual_recall, benchmark::Counter::kAvgThreads}});
   }
 }
@@ -515,13 +544,15 @@ void dispatch_benchmark(std::string cmdline,
   auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
   auto gt_file            = dataset_conf.groundtruth_neighbors_file;
   if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
-  auto dataset = std::make_shared<bin_dataset<T>>(dataset_conf.name,
-                                                  base_file,
-                                                  dataset_conf.subset_first_row,
-                                                  dataset_conf.subset_size,
-                                                  query_file,
-                                                  dataset_conf.distance,
-                                                  gt_file);
+  auto dataset =
+    std::make_shared<bench::dataset<T>>(dataset_conf.name,
+                                        base_file,
+                                        dataset_conf.subset_first_row,
+                                        dataset_conf.subset_size,
+                                        query_file,
+                                        dataset_conf.distance,
+                                        gt_file,
+                                        search_mode ? dataset_conf.filtering_rate : std::nullopt);
   ::benchmark::AddCustomContext("dataset", dataset_conf.name);
   ::benchmark::AddCustomContext("distance", dataset_conf.distance);
   std::vector<configuration::index> indices = conf.get_indices();
diff --git a/cpp/bench/ann/src/common/blob.hpp b/cpp/bench/ann/src/common/blob.hpp
new file mode 100644
index 000000000..81310ae0b
--- /dev/null
+++ b/cpp/bench/ann/src/common/blob.hpp
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ann_types.hpp"
+#include "util.hpp"
+
+#include <cerrno>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+namespace cuvs::bench {
+
+/** RAII wrapper for a file descriptor. */
+struct file_descriptor {
+  explicit file_descriptor(std::string file_name) : fd_{std::fopen(file_name.c_str(), "r")}
+  {
+    if (fd_ == nullptr) {
+      throw std::runtime_error(
+        "cuvs::bench::file_descriptor: failed to open a file (std::fopen): errno = " +
+        std::to_string(errno) + ", " + std::string(strerror(errno)) + ". File name: " + file_name);
+    }
+  }
+
+  explicit file_descriptor(size_t file_size_bytes) : fd_{std::tmpfile()}
+  {
+    if (fd_ == nullptr) {
+      throw std::runtime_error(
+        "cuvs::bench::file_descriptor: failed to open a temporary file (std::tmpfile): errno = " +
+        std::to_string(errno) + ", " + std::string(strerror(errno)));
+    }
+    if (ftruncate(fileno(fd_), file_size_bytes) == -1) {
+      throw std::runtime_error(
+        "cuvs::bench::file_descriptor: failed to call `ftruncate` to allocate memory for a "
+        "temporary file.");
+    }
+  }
+
+  // No copies for owning struct
+  file_descriptor(const file_descriptor& res)                      = delete;
+  auto operator=(const file_descriptor& other) -> file_descriptor& = delete;
+  // Moving is fine
+  file_descriptor(file_descriptor&& other) : fd_{std::exchange(other.fd_, nullptr)} {}
+  auto operator=(file_descriptor&& other) -> file_descriptor&
+  {
+    std::swap(this->fd_, other.fd_);
+    return *this;
+  }
+
+  ~file_descriptor() noexcept
+  {
+    if (fd_ != nullptr) { std::fclose(fd_); }
+  }
+
+  [[nodiscard]] auto value() const -> FILE* { return fd_; }
+
+ private:
+  FILE* fd_ = nullptr;
+};
+
+class mmap_error : public std::runtime_error {
+ private:
+  int errno_;
+
+ public:
+  mmap_error(std::string extra_msg)
+    : std::runtime_error("cuvs::bench::mmap_owner: `mmap` error: Value of errno " +
+                         std::to_string(errno) + ", " + std::string(strerror(errno)) + ". " +
+                         extra_msg),
+      errno_(errno)
+  {
+  }
+
+  [[nodiscard]] auto code() const noexcept { return errno_; }
+};
+
+/** RAII wrapper for a mmap/munmap. */
+struct mmap_owner {
+  /** Map a file */
+  mmap_owner(
+    const file_descriptor& descriptor, size_t offset, size_t size, int flags, bool writable = false)
+    : ptr_{mmap_verbose(size,
+                        writable ? PROT_READ | PROT_WRITE : PROT_READ,
+                        flags,
+                        fileno(descriptor.value()),
+                        offset)},
+      size_{size}
+  {
+  }
+
+  /** Allocate a new memory (not backed by a file). */
+  mmap_owner(size_t size, int flags)
+    : ptr_{mmap_verbose(size, PROT_READ | PROT_WRITE, flags, -1, 0)}, size_{size}
+  {
+  }
+
+  ~mmap_owner() noexcept
+  {
+    if (ptr_ != nullptr) { munmap(ptr_, size_); }
+  }
+
+  // No copies for owning struct
+  mmap_owner(const mmap_owner& res)                      = delete;
+  auto operator=(const mmap_owner& other) -> mmap_owner& = delete;
+  // Moving is fine
+  mmap_owner(mmap_owner&& other)
+    : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)}
+  {
+  }
+  auto operator=(mmap_owner&& other) -> mmap_owner&
+  {
+    std::swap(this->ptr_, other.ptr_);
+    std::swap(this->size_, other.size_);
+    return *this;
+  }
+
+  [[nodiscard]] auto data() const -> void* { return ptr_; }
+  [[nodiscard]] auto size() const -> size_t { return size_; }
+
+ private:
+  void* ptr_;
+  size_t size_;
+
+  static inline auto mmap_verbose(size_t length, int prot, int flags, int fd, off_t offset) -> void*
+  {
+    auto ptr = mmap(nullptr, length, prot, flags, fd, offset);
+    if (ptr == MAP_FAILED) {
+      std::array<char, 256> buf;
+      snprintf(buf.data(),
+               sizeof(buf),
+               "Failed call: cuvs::bench::mmap_owner:mmap(nullptr, %zu, 0x%08x, 0x%08x, %d, %zd)",
+               length,
+               prot,
+               flags,
+               fd,
+               offset);
+      throw mmap_error{std::string(buf.data())};
+    }
+    return ptr;
+  }
+};
+
+/** RAII wrapper for managed memory. */
+struct managed_mem_owner {
+  explicit managed_mem_owner(size_t size) : size_{size}
+  {
+#ifndef BUILD_CPU_ONLY
+    auto err_code = cudaMallocManaged(&ptr_, size_);
+    if (err_code != cudaSuccess) {
+      ptr_ = nullptr;
+      throw std::runtime_error{
+        "cuvs::bench::managed_mem_owner: call to cudaMallocManaged failed with code " +
+        std::to_string(err_code)};
+    }
+#else
+    throw std::runtime_error{
+      "Device functions are not available when built with BUILD_CPU_ONLY flag."};
+#endif
+  }
+
+  ~managed_mem_owner() noexcept
+  {
+    if (ptr_ != nullptr) {
+#ifndef BUILD_CPU_ONLY
+      cudaFree(ptr_);
+#endif
+    }
+  }
+
+  // No copies for owning struct
+  managed_mem_owner(const managed_mem_owner& res)                      = delete;
+  auto operator=(const managed_mem_owner& other) -> managed_mem_owner& = delete;
+  // Moving is fine
+  managed_mem_owner(managed_mem_owner&& other)
+    : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)}
+  {
+  }
+  auto operator=(managed_mem_owner&& other) -> managed_mem_owner&
+  {
+    std::swap(this->ptr_, other.ptr_);
+    std::swap(this->size_, other.size_);
+    return *this;
+  }
+
+  [[nodiscard]] auto data() const -> void* { return ptr_; }
+  [[nodiscard]] auto size() const -> size_t { return size_; }
+
+ private:
+  void* ptr_ = nullptr;
+  size_t size_;
+};
+
+/** RAII wrapper for device memory. */
+struct device_mem_owner {
+  explicit device_mem_owner(size_t size) : size_{size}
+  {
+#ifndef BUILD_CPU_ONLY
+    auto err_code = cudaMalloc(&ptr_, size_);
+    if (err_code != cudaSuccess) {
+      ptr_ = nullptr;
+      throw std::runtime_error{
+        "cuvs::bench::device_mem_owner: call to cudaMalloc failed with code " +
+        std::to_string(err_code)};
+    }
+#else
+    throw std::runtime_error{
+      "Device functions are not available when built with BUILD_CPU_ONLY flag."};
+#endif
+  }
+  ~device_mem_owner() noexcept
+  {
+    if (ptr_ != nullptr) {
+#ifndef BUILD_CPU_ONLY
+      cudaFree(ptr_);
+#endif
+    }
+  }
+  // No copies for owning struct
+  device_mem_owner(const device_mem_owner& res)                      = delete;
+  auto operator=(const device_mem_owner& other) -> device_mem_owner& = delete;
+  // Moving is fine
+  device_mem_owner(device_mem_owner&& other)
+    : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)}
+  {
+  }
+  auto operator=(device_mem_owner&& other) -> device_mem_owner&
+  {
+    std::swap(this->ptr_, other.ptr_);
+    std::swap(this->size_, other.size_);
+    return *this;
+  }
+
+  [[nodiscard]] auto data() const -> void* { return ptr_; }
+  [[nodiscard]] auto size() const -> size_t { return size_; }
+
+ private:
+  void* ptr_ = nullptr;
+  size_t size_;
+};
+
+/** Lazy-initialized file handle. */
+struct file {
+  explicit file(std::string file_name) : reqsize_or_name_{std::move(file_name)} {}
+  explicit file(size_t tmp_size_bytes) : reqsize_or_name_{tmp_size_bytes} {}
+
+  // this shouldn't be necessary, but adds extra safety (make sure descriptors are not copied)
+  file(const file&)                    = delete;
+  auto operator=(const file&) -> file& = delete;
+  file(file&&);
+  auto operator=(file&&) -> file&;
+
+  [[nodiscard]] auto descriptor() const -> const file_descriptor&
+  {
+    if (!descriptor_.has_value()) {
+      std::visit([&d = descriptor_](auto&& x) { d.emplace(x); }, reqsize_or_name_);
+    }
+    return descriptor_.value();
+  }
+
+  [[nodiscard]] auto path() const -> std::string
+  {
+    return std::holds_alternative<std::string>(reqsize_or_name_)
+             ? std::get<std::string>(reqsize_or_name_)
+             : "<temporary>";
+  }
+
+  [[nodiscard]] auto size() const -> size_t
+  {
+    if (!size_.has_value()) {
+      size_.emplace(std::visit(
+        [&s = size_](auto&& x) {
+          if constexpr (std::is_same_v<std::decay_t<decltype(x)>, size_t>) {
+            return x;
+          } else {
+            struct stat statbuf;
+            if (stat(x.c_str(), &statbuf) != 0) {
+              throw std::runtime_error{"cuvs::bench::file::size() error: `stat` failed: " + x};
+            }
+            return static_cast<size_t>(statbuf.st_size);
+          }
+        },
+        reqsize_or_name_));
+    }
+    return size_.value();
+  }
+
+  [[nodiscard]] auto is_temporary() const -> bool
+  {
+    return std::holds_alternative<size_t>(reqsize_or_name_);
+  }
+
+  void reset_lazy_state() const
+  {
+    descriptor_.reset();
+    size_.reset();
+  }
+
+ private:
+  std::variant<size_t, std::string> reqsize_or_name_;
+  mutable std::optional<file_descriptor> descriptor_ = std::nullopt;
+  mutable std::optional<size_t> size_                = std::nullopt;
+};
+
+// declare the move constructors explicitly outside of the class declaration to make sure if
+// anything is wrong, we catch that at compile time.
+inline file::file(file&&)                    = default;
+inline auto file::operator=(file&&) -> file& = default;
+
+/**
+ * Lazy-initialized file handle with the size information provided by our .bin format:
+ * The file always starts with two uint32_t values: [n_rows, n_cols].
+ */
+template <typename T>
+struct blob_file : public file {
+  explicit blob_file(std::string file_name, uint32_t rows_offset = 0, uint32_t rows_limit = 0)
+    : file{std::move(file_name)}, rows_offset_{rows_offset}, rows_limit_{rows_limit}
+  {
+  }
+
+  explicit blob_file(uint32_t n_rows, uint32_t n_cols)
+    : file{sizeof(T) * n_rows * n_cols + 2 * sizeof(uint32_t)}, rows_offset_{0}, rows_limit_{0}
+  {
+    // NB: this forces the file descriptor, thus breaking lazy-initialization. Not sure if it's
+    // worth refactoring in this case.
+    std::array<uint32_t, 2> h{n_rows, n_cols};
+    if (std::fwrite(h.data(), sizeof(uint32_t), h.size(), descriptor().value()) != 2) {
+      throw std::runtime_error{
+        "cuvs::bench::blob_file `fwrite` failed when initializing a tmp file."};
+    }
+    if (std::fflush(descriptor().value()) != 0) {
+      throw std::runtime_error{
+        "cuvs::bench::blob_file `fflush` failed with non-zero code when initializing a tmp file."};
+    }
+  }
+
+  blob_file(const blob_file<T>&)                    = delete;
+  auto operator=(const blob_file<T>&) -> blob_file& = delete;
+  blob_file(blob_file<T>&&);
+  auto operator=(blob_file<T>&&) -> blob_file<T>&;
+
+  [[nodiscard]] auto n_rows() const -> uint32_t { return header()[0]; }
+  [[nodiscard]] auto n_cols() const -> uint32_t { return header()[1]; }
+
+  [[nodiscard]] auto rows_offset() const -> uint32_t { return rows_offset_; }
+  [[nodiscard]] auto rows_limit() const -> uint32_t
+  {
+    auto rows_max = n_rows() - std::min(rows_offset(), n_rows());          // available rows
+    return rows_limit_ == 0 ? rows_max : std::min(rows_limit_, rows_max);  // limited rows
+  }
+
+  void reset_lazy_state() const
+  {
+    file::reset_lazy_state();
+    header_.reset();
+  }
+
+ private:
+  mutable std::optional<std::array<uint32_t, 2>> header_ = std::nullopt;
+  uint32_t rows_offset_;
+  uint32_t rows_limit_;
+
+  [[nodiscard]] auto header() const -> const std::array<uint32_t, 2>&
+  {
+    if (!header_.has_value()) {
+      std::array<uint32_t, 2> h;
+      std::rewind(descriptor().value());
+      if (std::fread(h.data(), sizeof(uint32_t), h.size(), descriptor().value()) != 2) {
+        throw std::runtime_error{"cuvs::bench::blob_file read header of bin file failed: " +
+                                 path()};
+      }
+      header_.emplace(h);
+    }
+    return header_.value();
+  }
+};
+
+// declare the move constructors explicitly outside of the class declaration to make sure if
+// anything is wrong, we catch that at compile time.
+template <typename T>
+inline blob_file<T>::blob_file(blob_file<T>&&) = default;
+template <typename T>
+inline auto blob_file<T>::operator=(blob_file<T>&&) -> blob_file<T>& = default;
+
+/** Lazily map or copy the file content onto host memory. */
+template <typename T>
+struct blob_mmap {
+  explicit blob_mmap(blob_file<T>&& file,
+                     bool copy_in_memory     = false,
+                     HugePages hugepages_2mb = HugePages::kDisable)
+    : file_{std::move(file)},
+      copy_in_memory_{copy_in_memory},
+      hugepages_2mb_requested_{hugepages_2mb},
+      hugepages_2mb_actual_{hugepages_2mb > HugePages::kDisable}
+  {
+  }
+  explicit blob_mmap(std::string file_name,
+                     uint32_t rows_offset    = 0,
+                     uint32_t rows_limit     = 0,
+                     bool copy_in_memory     = false,
+                     HugePages hugepages_2mb = HugePages::kDisable)
+    : blob_mmap{
+        blob_file<T>{std::move(file_name), rows_offset, rows_limit}, copy_in_memory, hugepages_2mb}
+  {
+  }
+
+ private:
+  blob_file<T> file_;
+  bool copy_in_memory_;
+  HugePages hugepages_2mb_requested_;
+  mutable bool hugepages_2mb_actual_;
+
+  mutable std::optional<std::tuple<mmap_owner, ptrdiff_t>> handle_;
+
+  [[nodiscard]] auto handle() const -> const std::tuple<mmap_owner, ptrdiff_t>&
+  {
+    if (!handle_.has_value()) {
+      size_t page_size = hugepages_2mb_actual_ ? 1024ull * 1024ull * 2ull : sysconf(_SC_PAGE_SIZE);
+      int flags        = 0;
+      if (hugepages_2mb_actual_) { flags |= MAP_HUGETLB | MAP_HUGE_2MB; }
+      size_t data_start = sizeof(T) * file_.rows_offset() * file_.n_cols() + sizeof(uint32_t) * 2;
+      size_t data_end   = sizeof(T) * file_.rows_limit() * file_.n_cols() + data_start;
+
+      try {
+        if (copy_in_memory_) {
+          // Copy the content in-memory
+          flags |= MAP_ANONYMOUS | MAP_PRIVATE;
+          size_t size = data_end - data_start;
+          mmap_owner owner{size, flags};
+          std::fseek(file_.descriptor().value(), data_start, SEEK_SET);
+          size_t n_elems = file_.rows_limit() * file_.n_cols();
+          if (std::fread(owner.data(), sizeof(T), n_elems, file_.descriptor().value()) != n_elems) {
+            throw std::runtime_error{"cuvs::bench::blob_mmap() fread " + file_.path() + " failed"};
+          }
+          handle_.emplace(std::move(owner), 0);
+        } else {
+          // Map the file
+          // If this is a temporary file, we're supposed to write to it, hence MAP_SHARED.
+          flags |= file_.is_temporary() ? MAP_SHARED : MAP_PRIVATE;
+          size_t mmap_start = (data_start / page_size) * page_size;
+          size_t mmap_size  = data_end - mmap_start;
+          handle_.emplace(
+            mmap_owner{file_.descriptor(), mmap_start, mmap_size, flags, file_.is_temporary()},
+            data_start - mmap_start);
+        }
+      } catch (const mmap_error& e) {
+        bool hugepages_2mb_asked = hugepages_2mb_requested_ == HugePages::kAsk ||
+                                   hugepages_2mb_requested_ == HugePages::kRequire;
+        if (e.code() == EPERM && hugepages_2mb_asked && hugepages_2mb_actual_) {
+          if (hugepages_2mb_requested_ == HugePages::kRequire) {
+            log_warn(
+              "cuvs::bench::blob_mmap: `mmap` failed to map due to EPERM, which is likely caused "
+              "by the permissions issue. You either need a CAP_IPC_LOCK capability or run the "
+              "program with sudo. We will try again without huge pages.");
+          }
+          hugepages_2mb_actual_ = false;
+          return handle();
+        }
+        if (e.code() == EINVAL && hugepages_2mb_asked && hugepages_2mb_actual_ &&
+            !copy_in_memory_) {
+          if (hugepages_2mb_requested_ == HugePages::kRequire) {
+            log_warn(
+              "cuvs::bench::blob_mmap: `mmap` failed to map due to EINVAL, which is likely caused "
+              "by the file system not supporting huge pages. We will try again without huge "
+              "pages.");
+          }
+          hugepages_2mb_actual_ = false;
+          return handle();
+        }
+        throw;  // The error is not due to huge pages or otherwise unrecoverable
+      }
+    }
+    return handle_.value();
+  }
+
+ public:
+  [[nodiscard]] auto data() const -> T*
+  {
+    auto& [owner, offset] = handle();
+    return reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(owner.data()) + offset);
+  }
+
+  [[nodiscard]] auto is_in_memory() const -> bool { return copy_in_memory_; }
+  [[nodiscard]] auto is_hugepage() const -> bool { return hugepages_2mb_actual_; }
+  /**
+   * Enabling hugepages is not always possible. For convenience, we may silently disable it in some
+   * cases. This function helps to decide whether the current setting is compatible with the
+   * request.R
+   */
+  [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool
+  {
+    if (request == hugepages_2mb_requested_) {
+      // whatever the actual state is, the result would be the same if we recreated
+      // the mapping.
+      return true;
+    }
+    bool hp_enabled    = hugepages_2mb_actual_ && request > HugePages::kDisable;
+    bool hp_disabled   = !hugepages_2mb_actual_ && request == HugePages::kDisable;
+    bool hp_not_forced = !hugepages_2mb_actual_ && request == HugePages::kAsk;
+    return hp_enabled || hp_disabled || hp_not_forced;
+  }
+  [[nodiscard]] auto n_rows() const -> uint32_t { return file_.rows_limit(); }
+  [[nodiscard]] auto n_cols() const -> uint32_t { return file_.n_cols(); }
+  [[nodiscard]] auto size() const -> size_t { return sizeof(T) * n_rows() * n_cols(); }
+  [[nodiscard]] auto unmap() && noexcept -> blob_file<T>
+  {
+    // If we used mmap on a temporary file, then it was writable.
+    // Then there's a chance the user wrote something to the mmap.
+    // Then we must ensure the changes are visible in the file before migrating it.
+    bool flush_writes = file_.is_temporary() && handle_.has_value();
+    if (flush_writes) {
+      auto& [owner, _] = handle();
+      msync(owner.data(), owner.size(), MS_SYNC | MS_INVALIDATE);
+    }
+    handle_.reset();
+    if (flush_writes) { std::fflush(file_.descriptor().value()); }
+    return blob_file<T>{std::move(file_)};
+  }
+  [[nodiscard]] auto release() && noexcept -> blob_file<T> { return std::move(*this).unmap(); }
+
+  void reset_lazy_state() const
+  {
+    file_.reset_lazy_state();
+    hugepages_2mb_actual_ = hugepages_2mb_requested_ > HugePages::kDisable;
+  }
+};
+
+template <typename T>
+struct blob_pinned {
+ private:
+  mutable blob_mmap<T> blob_;
+  mutable void* ptr_ = nullptr;
+
+ public:
+  explicit blob_pinned(blob_mmap<T>&& blob) : blob_{std::move(blob)} {}
+  // First map the file and then register to CUDA.
+  // NB: as per docs, huge pages are not supported.
+  explicit blob_pinned(blob_file<T>&& blob, bool copy_in_memory = true)
+    : blob_{std::move(blob), copy_in_memory, false}
+  {
+  }
+  explicit blob_pinned(std::string file_name,
+                       uint32_t rows_offset = 0,
+                       uint32_t rows_limit  = 0,
+                       bool copy_in_memory  = true)
+    : blob_pinned{blob_file<T>{file_name, rows_offset, rows_limit}, copy_in_memory}
+  {
+  }
+
+  ~blob_pinned() noexcept { reset_lazy_state(); }
+
+  // No copies for owning struct
+  blob_pinned(const blob_pinned& res)                      = delete;
+  auto operator=(const blob_pinned& other) -> blob_pinned& = delete;
+  // Moving is fine
+  blob_pinned(blob_pinned&& other) : blob_{std::move(other.blob_)}, ptr_{other.ptr_}
+  {
+    other.ptr_ = nullptr;
+  }
+  auto operator=(blob_pinned&& other) -> blob_pinned&
+  {
+    std::swap(this->blob_, other.blob_);
+    std::swap(this->ptr_, other.ptr_);
+    return *this;
+  }
+
+  [[nodiscard]] auto data() const -> T*
+  {
+    if (ptr_ == nullptr) {
+      void* ptr = reinterpret_cast<void*>(blob_.data());
+#ifndef BUILD_CPU_ONLY
+      int flags       = cudaHostRegisterDefault;
+      auto error_code = cudaSuccess;
+      if (!blob_.is_in_memory()) {
+        flags      = cudaHostRegisterIoMemory | cudaHostRegisterReadOnly;
+        error_code = cudaHostRegister(ptr, blob_.size(), flags);
+        if (error_code == cudaErrorNotSupported) {
+          // Sometimes read-only is not supported
+          flags      = cudaHostRegisterIoMemory;
+          error_code = cudaHostRegister(ptr, blob_.size(), error_code);
+        }
+      } else {
+        error_code = cudaHostRegister(ptr, blob_.size(), cudaHostRegisterDefault);
+      }
+      if (error_code == cudaErrorInvalidValue && (!blob_.is_in_memory() || blob_.is_hugepage())) {
+        auto hugepage =
+          (blob_.is_hugepage() && blob_.is_in_memory()) ? HugePages::kAsk : HugePages::kDisable;
+        auto file = std::move(blob_).release();
+        blob_     = blob_mmap<T>{std::move(file), true, hugepage};
+        return data();
+      }
+      if (error_code != cudaSuccess) {
+        log_error(
+          "cuvs::bench::blob_pinned: cudaHostRegister(%p, %zu, %d)", ptr, blob_.size(), flags);
+        throw std::runtime_error{
+          "cuvs::bench::blob_pinned: call to cudaHostRegister failed with code " +
+          std::to_string(error_code)};
+      }
+#endif
+      ptr_ = ptr;
+    }
+    return reinterpret_cast<T*>(ptr_);
+  }
+
+  [[nodiscard]] auto unpin() && noexcept -> blob_mmap<T>
+  {
+    // unregister the memory before passing it to a third party
+    if (ptr_ != nullptr) {
+#ifndef BUILD_CPU_ONLY
+      cudaHostUnregister(ptr_);
+#endif
+      ptr_ = nullptr;
+    }
+    return blob_mmap<T>{std::move(blob_)};
+  }
+  void reset_lazy_state() const
+  {
+    if (ptr_ != nullptr) {
+#ifndef BUILD_CPU_ONLY
+      cudaHostUnregister(ptr_);
+#endif
+      ptr_ = nullptr;
+    }
+    blob_.reset_lazy_state();
+  }
+
+  [[nodiscard]] auto is_in_memory() const noexcept -> bool { return true; }
+  [[nodiscard]] auto is_hugepage() const noexcept -> bool { return blob_.is_hugepage(); }
+  [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool
+  {
+    return blob_.hugepage_compliant(request);
+  }
+  [[nodiscard]] auto n_rows() const -> uint32_t { return blob_.n_rows(); }
+  [[nodiscard]] auto n_cols() const -> uint32_t { return blob_.n_cols(); }
+  [[nodiscard]] auto size() const -> size_t { return blob_.size(); }
+  [[nodiscard]] auto release() && noexcept -> blob_file<T>
+  {
+    return std::move(*this).unpin().release();
+  }
+};
+
+template <typename OwnerType, typename T>
+struct blob_copying {
+ private:
+  blob_mmap<T> blob_;
+  mutable std::optional<OwnerType> mem_ = std::nullopt;
+
+ public:
+  explicit blob_copying(blob_mmap<T>&& blob) : blob_{std::move(blob)} {}
+  // First map the file and then copy it to device; use huge pages for faster copy
+  explicit blob_copying(blob_file<T>&& blob) : blob_{std::move(blob), false, HugePages::kAsk} {}
+  explicit blob_copying(std::string file_name, uint32_t rows_offset = 0, uint32_t rows_limit = 0)
+    : blob_copying{blob_file<T>{file_name, rows_offset, rows_limit}}
+  {
+  }
+
+  [[nodiscard]] auto data() const -> T*
+  {
+    if (!mem_.has_value()) {
+      mem_.emplace(blob_.size());
+#ifndef BUILD_CPU_ONLY
+      auto error_code = cudaMemcpy(mem_->data(), blob_.data(), blob_.size(), cudaMemcpyDefault);
+      if (error_code != cudaSuccess) {
+        throw std::runtime_error{"cuvs::bench::blob_device: call to cudaMemcpy failed with code " +
+                                 std::to_string(error_code)};
+      }
+#endif
+    }
+    return reinterpret_cast<T*>(mem_->data());
+  }
+
+  [[nodiscard]] auto free() && noexcept -> blob_mmap<T>
+  {
+    mem_.reset();
+    return blob_mmap<T>{std::move(blob_)};
+  }
+  void reset_lazy_state() const
+  {
+    mem_.reset();
+    blob_.reset_lazy_state();
+  }
+
+  [[nodiscard]] auto is_in_memory() const noexcept -> bool { return true; }
+  [[nodiscard]] auto is_hugepage() const noexcept -> bool { return blob_.is_hugepage(); }
+  // For copying CUDA allocation, the hugepage setting is not relevant at all.
+  [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool { return true; }
+  [[nodiscard]] auto n_rows() const -> uint32_t { return blob_.n_rows(); }
+  [[nodiscard]] auto n_cols() const -> uint32_t { return blob_.n_cols(); }
+  [[nodiscard]] auto size() const -> size_t { return blob_.size(); }
+  [[nodiscard]] auto release() && noexcept -> blob_file<T>
+  {
+    return std::move(*this).free().release();
+  }
+};
+
+template <typename T>
+using blob_device = blob_copying<device_mem_owner, T>;
+template <typename T>
+using blob_managed = blob_copying<managed_mem_owner, T>;
+
+/**
+ * @brief A blob is a single contiguous piece of data.
+ *
+ * It can reside in host, managed, or device memory, it can be pinned in physical memory or backed
+ * by a filesystem. You can also control whether to use huge pages (2MB) when it is in host memory.
+ *
+ * The blob data is a one- or two-dimensional typed array (it has n_rows and n_cols properties).
+ *
+ * The blob tries to be lazy accessing the data. It reads or copies the data only when it is
+ * requested. This allows the benchmarking executable be more resilient; for example, a blob can
+ * point to a non-existent query dataset when the benchmark is in the index build mode.
+ *
+ * @tparam T the data type.
+ */
+template <typename T>
+struct blob {
+ private:
+  using blob_type = std::variant<blob_mmap<T>, blob_pinned<T>, blob_device<T>, blob_managed<T>>;
+  mutable blob_type value_;
+
+  [[nodiscard]] auto data_mmap(bool in_memory, HugePages request_hugepages_2mb) const -> T*
+  {
+    if (auto* v = std::get_if<blob_mmap<T>>(&value_)) {
+      if (v->is_in_memory() == in_memory && v->hugepage_compliant(request_hugepages_2mb)) {
+        return v->data();
+      }
+    }
+    blob_type tmp{std::move(value_)};
+    value_ = std::visit(
+      [in_memory, request_hugepages_2mb](auto&& val) {
+        return blob_mmap<T>{std::move(val).release(), in_memory, request_hugepages_2mb};
+      },
+      std::move(tmp));
+
+    return data();
+  }
+
+  [[nodiscard]] auto data_pinned(HugePages request_hugepages_2mb) const -> T*
+  {
+    // The requested type is there
+    if (auto* v = std::get_if<blob_pinned<T>>(&value_)) {
+      if (v->hugepage_compliant(request_hugepages_2mb)) { return v->data(); }
+    }
+    // If there's already an mmap allocation, we just need to pin it.
+    if (auto* v = std::get_if<blob_mmap<T>>(&value_)) {
+      if (v->hugepage_compliant(request_hugepages_2mb)) {
+        blob_mmap<T> tmp{std::move(*v)};
+        return value_.template emplace<blob_pinned<T>>(std::move(tmp)).data();
+      }
+    }
+    // otherwise do full reset
+    blob_type tmp{std::move(value_)};
+    value_ = std::visit(
+      [request_hugepages_2mb](auto&& val) {
+        blob_mmap<T> tmp{std::move(val).release(), true, request_hugepages_2mb};
+        return blob_pinned<T>{std::move(tmp)};
+      },
+      std::move(value_));
+
+    return data();
+  }
+
+  [[nodiscard]] auto data_device() const -> T*
+  {
+    // The requested type is there
+    if (auto* v = std::get_if<blob_device<T>>(&value_)) { return v->data(); }
+    // otherwise do full reset
+    blob_type tmp{std::move(value_)};
+    value_ = std::visit([](auto&& val) { return blob_device<T>{std::move(val).release()}; },
+                        std::move(tmp));
+    return data();
+  }
+
+  [[nodiscard]] auto data_managed() const -> T*
+  {
+    // The requested type is there
+    if (auto* v = std::get_if<blob_managed<T>>(&value_)) { return v->data(); }
+    // otherwise do full reset
+    blob_type tmp{std::move(value_)};
+    value_ = std::visit([](auto&& val) { return blob_managed<T>{std::move(val).release()}; },
+                        std::move(tmp));
+    return data();
+  }
+
+ public:
+  explicit blob(std::string file_name,
+                uint32_t rows_offset    = 0,
+                uint32_t rows_limit     = 0,
+                bool copy_in_memory     = false,
+                HugePages hugepages_2mb = HugePages::kDisable)
+    : value_{std::in_place_type<blob_mmap<T>>,
+             std::move(file_name),
+             rows_offset,
+             rows_limit,
+             copy_in_memory,
+             hugepages_2mb}
+  {
+  }
+
+  template <typename VariantT>
+  explicit blob(VariantT&& blob_variant) : value_{std::move(blob_variant)}
+  {
+  }
+
+  [[nodiscard]] auto data() const -> T*
+  {
+    return std::visit([](auto&& val) { return val.data(); }, value_);
+  }
+
+  [[nodiscard]] auto data(MemoryType memory_type,
+                          HugePages request_hugepages_2mb = HugePages::kDisable) const -> T*
+  {
+    switch (memory_type) {
+      case MemoryType::kHost: return data_mmap(true, request_hugepages_2mb);
+      case MemoryType::kHostMmap: return data_mmap(false, request_hugepages_2mb);
+      case MemoryType::kHostPinned:
+        if (request_hugepages_2mb > HugePages::kDisable) {
+          log_error(
+            "cuvs::bench::blob::data(): huge pages are requested but not supported by "
+            "cudaHostRegister at the moment. We will try nevertheless...");
+        }
+        return data_pinned(request_hugepages_2mb);
+      case MemoryType::kDevice: return data_device();    // hugepages are not relevant here
+      case MemoryType::kManaged: return data_managed();  // hugepages are not relevant here
+      default:
+        throw std::runtime_error{"cuvs::bench::blob::data(): unexpected memory type " +
+                                 std::to_string(static_cast<int>(memory_type))};
+    }
+  }
+
+  [[nodiscard]] auto is_in_memory() const noexcept -> bool
+  {
+    return std::visit([](auto&& val) { return val.is_in_memory(); }, value_);
+  }
+  [[nodiscard]] auto is_hugepage() const noexcept -> bool
+  {
+    return std::visit([](auto&& val) { return val.is_hugepage(); }, value_);
+  }
+  [[nodiscard]] auto n_rows() const -> uint32_t
+  {
+    return std::visit([](auto&& val) { return val.n_rows(); }, value_);
+  }
+
+  [[nodiscard]] auto n_cols() const -> uint32_t
+  {
+    return std::visit([](auto&& val) { return val.n_cols(); }, value_);
+  }
+  /** Size of the blob content in bytes (doesn't include file header). */
+  [[nodiscard]] auto size() const -> size_t
+  {
+    return std::visit([](auto&& val) { return val.size(); }, value_);
+  }
+  /**
+   * Reset the hidden internal state, e.g. release the memory allocation or close a file descriptor.
+   * This is useful in case when accessing the blob resulted in an error/invalid state, which should
+   * be reproduced at a later point in time.
+   */
+  void reset_lazy_state() const
+  {
+    std::visit([](auto&& val) { val.reset_lazy_state(); }, value_);
+  }
+};
+
+}  // namespace  cuvs::bench
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
index 1fc7327cb..ac1361219 100644
--- a/cpp/bench/ann/src/common/conf.hpp
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -45,14 +45,16 @@ class configuration {
     // the range of rows is [subset_first_row, subset_first_row + subset_size)
     // however, subset_size = 0 means using all rows after subset_first_row
     // that is, the subset is [subset_first_row, #rows in base_file)
-    size_t subset_first_row{0};
-    size_t subset_size{0};
+    uint32_t subset_first_row{0};
+    uint32_t subset_size{0};
     std::string query_file;
     std::string distance;
     std::optional<std::string> groundtruth_neighbors_file{std::nullopt};
 
     // data type of input dataset, possible values ["float", "int8", "uint8"]
     std::string dtype;
+
+    std::optional<double> filtering_rate{std::nullopt};
   };
 
   explicit inline configuration(std::istream& conf_stream)
@@ -74,6 +76,9 @@ class configuration {
     dataset_conf_.base_file  = conf.at("base_file");
     dataset_conf_.query_file = conf.at("query_file");
     dataset_conf_.distance   = conf.at("distance");
+    if (conf.contains("filtering_rate")) {
+      dataset_conf_.filtering_rate.emplace(conf.at("filtering_rate"));
+    }
 
     if (conf.contains("groundtruth_neighbors_file")) {
       dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp
index 49020fe36..ba801d165 100644
--- a/cpp/bench/ann/src/common/dataset.hpp
+++ b/cpp/bench/ann/src/common/dataset.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,529 +16,166 @@
 #pragma once
 
 #include "ann_types.hpp"
-#include "util.hpp"
+#include "blob.hpp"
 
-#include <cerrno>
-#include <sys/mman.h>
-#include <sys/stat.h>
-
-#include <cassert>
 #include <cstdint>
 #include <cstdio>
 #include <optional>
-#include <stdexcept>
+#include <random>
 #include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
 
 namespace cuvs::bench {
 
-// http://big-algo-benchmarks.com/index.html:
-// binary format that starts with 8 bytes of data consisting of num_points(uint32_t)
-// num_dimensions(uint32) followed by num_pts x num_dimensions x sizeof(type) bytes of
-// data stored one vector after another.
-// Data files will have suffixes .fbin, .u8bin, and .i8bin to represent float32, uint8
-// and int8 type data.
-// As extensions for this benchmark, half and int data files will have suffixes .f16bin
-// and .ibin, respectively.
-template <typename T>
-class bin_file {
- public:
-  bin_file(std::string file,
-           const std::string& mode,
-           uint32_t subset_first_row = 0,
-           uint32_t subset_size      = 0);
-  ~bin_file()
-  {
-    if (mapped_ptr_ != nullptr) { unmap(); }
-    if (fp_ != nullptr) { fclose(fp_); }
-  }
-  bin_file(const bin_file&)                    = delete;
-  auto operator=(const bin_file&) -> bin_file& = delete;
-
-  void get_shape(size_t* nrows, int* ndims) const
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file(); }
-    *nrows = nrows_;
-    *ndims = ndims_;
-  }
-
-  void read(T* data) const
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file(); }
-    size_t total = static_cast<size_t>(nrows_) * ndims_;
-    if (fread(data, sizeof(T), total, fp_) != total) {
-      throw std::runtime_error{"fread() bin_file " + file_ + " failed"};
-    }
-  }
-
-  void write(const T* data, uint32_t nrows, uint32_t ndims)
-  {
-    assert(!read_mode_);
-    if (!fp_) { open_file(); }
-    if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) {
-      throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"};
-    }
-    if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) {
-      throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"};
-    }
-
-    size_t total = static_cast<size_t>(nrows) * ndims;
-    if (fwrite(data, sizeof(T), total, fp_) != total) {
-      throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"};
-    }
-  }
-
-  auto map() const -> T*
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file(); }
-    int fid     = fileno(fp_);
-    mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
-    if (mapped_ptr_ == MAP_FAILED) {
-      mapped_ptr_ = nullptr;
-      throw std::runtime_error{"mmap error: Value of errno " + std::to_string(errno) + ", " +
-                               std::string(strerror(errno))};
-    }
-    return reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(mapped_ptr_) + 2 * sizeof(uint32_t) +
-                                subset_first_row_ * ndims_ * sizeof(T));
-  }
-
-  void unmap() const
-  {
-    if (munmap(mapped_ptr_, file_size_) == -1) {
-      throw std::runtime_error{"munmap error: " + std::string(strerror(errno))};
-    }
-  }
-
- private:
-  void check_suffix();
-  void open_file() const;
-
-  std::string file_;
-  bool read_mode_;
-  uint32_t subset_first_row_;
-  uint32_t subset_size_;
-
-  mutable FILE* fp_{nullptr};
-  mutable uint32_t nrows_;
-  mutable uint32_t ndims_;
-  mutable size_t file_size_;
-  mutable void* mapped_ptr_{nullptr};
-};
-
-template <typename T>
-bin_file<T>::bin_file(std::string file,
-                      const std::string& mode,
-                      uint32_t subset_first_row,
-                      uint32_t subset_size)
-  : file_(std::move(file)),
-    read_mode_(mode == "r"),
-    subset_first_row_(subset_first_row),
-    subset_size_(subset_size)
-
+template <typename CarrierT>
+void generate_bernoulli(CarrierT* data, size_t words, double p)
 {
-  check_suffix();
-
-  if (!read_mode_) {
-    if (mode == "w") {
-      if (subset_first_row != 0) {
-        throw std::runtime_error{"subset_first_row should be zero for write mode"};
-      }
-      if (subset_size != 0) {
-        throw std::runtime_error{"subset_size should be zero for write mode"};
-      }
-    } else {
-      throw std::runtime_error{"bin_file's mode must be either 'r' or 'w': " + file_};
+  constexpr size_t kBitsPerCarrierValue = sizeof(CarrierT) * 8;
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::bernoulli_distribution d(p);
+  for (size_t i = 0; i < words; i++) {
+    CarrierT word = 0;
+    for (size_t j = 0; j < kBitsPerCarrierValue; j++) {
+      word |= CarrierT{d(gen)} << j;
     }
+    data[i] = word;
   }
-}
-
-template <typename T>
-void bin_file<T>::open_file() const
-{
-  fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w");
-  if (!fp_) { throw std::runtime_error{"open bin_file failed: " + file_}; }
-
-  if (read_mode_) {
-    struct stat statbuf;
-    if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error{"stat() failed: " + file_}; }
-    file_size_ = statbuf.st_size;
+};
 
-    uint32_t header[2];
-    if (fread(header, sizeof(uint32_t), 2, fp_) != 2) {
-      throw std::runtime_error{"read header of bin_file failed: " + file_};
-    }
-    nrows_ = header[0];
-    ndims_ = header[1];
+template <typename DataT, typename IdxT = int32_t>
+struct dataset {
+ public:
+  using bitset_carrier_type                           = uint32_t;
+  static inline constexpr size_t kBitsPerCarrierValue = sizeof(bitset_carrier_type) * 8;
 
-    size_t expected_file_size =
-      2 * sizeof(uint32_t) + static_cast<size_t>(nrows_) * ndims_ * sizeof(T);
-    if (file_size_ != expected_file_size) {
-      throw std::runtime_error{"expected file size of " + file_ + " is " +
-                               std::to_string(expected_file_size) + ", however, actual size is " +
-                               std::to_string(file_size_)};
-    }
+ private:
+  std::string name_;
+  std::string distance_;
+  blob<DataT> base_set_;
+  blob<DataT> query_set_;
+  std::optional<blob<IdxT>> ground_truth_set_;
+  std::optional<blob<bitset_carrier_type>> filter_bitset_;
 
-    if (subset_first_row_ >= nrows_) {
-      throw std::runtime_error{file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
-                               ") >= nrows (" + std::to_string(nrows_) + ")"};
-    }
-    if (subset_first_row_ + subset_size_ > nrows_) {
-      throw std::runtime_error{file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
-                               ") + subset_size (" + std::to_string(subset_size_) + ") > nrows (" +
-                               std::to_string(nrows_) + ")"};
-    }
+  mutable bool base_set_accessed_  = false;
+  mutable bool query_set_accessed_ = false;
 
-    if (subset_first_row_) {
-      static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset");
-      if (fseek(fp_, sizeof(T) * subset_first_row_ * ndims_, SEEK_CUR) == -1) {
-        throw std::runtime_error{file_ + ": fseek failed"};
+ public:
+  dataset(std::string name,
+          std::string base_file,
+          uint32_t subset_first_row,
+          uint32_t subset_size,
+          std::string query_file,
+          std::string distance,
+          std::optional<std::string> groundtruth_neighbors_file,
+          std::optional<double> filtering_rate = std::nullopt)
+    : name_{std::move(name)},
+      distance_{std::move(distance)},
+      base_set_{base_file, subset_first_row, subset_size},
+      query_set_{query_file},
+      ground_truth_set_{groundtruth_neighbors_file.has_value()
+                          ? std::make_optional<blob<IdxT>>(groundtruth_neighbors_file.value())
+                          : std::nullopt}
+  {
+    if (filtering_rate.has_value()) {
+      // Generate a random bitset for filtering
+      auto n_rows = static_cast<size_t>(subset_size) + static_cast<size_t>(subset_first_row);
+      if (subset_size == 0) {
+        // Read the base set size as a last resort only - for better laziness
+        n_rows = base_set_size();
       }
-      nrows_ -= subset_first_row_;
+      auto bitset_size = (n_rows - 1) / kBitsPerCarrierValue + 1;
+      blob_file<bitset_carrier_type> bitset_blob_file{static_cast<uint32_t>(bitset_size), 1};
+      blob_mmap<bitset_carrier_type> bitset_blob{
+        std::move(bitset_blob_file), false, HugePages::kDisable};
+      generate_bernoulli(const_cast<bitset_carrier_type*>(bitset_blob.data()),
+                         bitset_size,
+                         1.0 - filtering_rate.value());
+      filter_bitset_.emplace(std::move(bitset_blob));
     }
-    if (subset_size_) { nrows_ = subset_size_; }
   }
-}
 
-template <typename T>
-void bin_file<T>::check_suffix()
-{
-  auto pos = file_.rfind('.');
-  if (pos == std::string::npos) {
-    throw std::runtime_error{"name of bin_file doesn't have a suffix: " + file_};
+  [[nodiscard]] auto name() const -> std::string { return name_; }
+  [[nodiscard]] auto distance() const -> std::string { return distance_; }
+  [[nodiscard]] auto dim() const -> int
+  {
+    // If any of base/query set are already accessed, use those
+    if (base_set_accessed_) { return static_cast<int>(base_set_.n_cols()); }
+    if (query_set_accessed_) { return static_cast<int>(query_set_.n_cols()); }
+    // Otherwise, try reading both (one of the two sets may be missing)
+    try {
+      query_set_accessed_ = true;
+      return static_cast<int>(query_set_.n_cols());
+    } catch (const std::runtime_error& e) {
+      // Any exception raised above will re-raise next time we try to access the query set.
+      query_set_accessed_ = false;
+      query_set_.reset_lazy_state();
+    }
+    base_set_accessed_ = true;
+    return static_cast<int>(base_set_.n_cols());
+  }
+  [[nodiscard]] auto max_k() const -> uint32_t
+  {
+    if (ground_truth_set_.has_value()) { return ground_truth_set_->n_cols(); }
+    return 0;
   }
-  std::string suffix = file_.substr(pos + 1);
-
-  if constexpr (std::is_same_v<T, float>) {
-    if (suffix != "fbin") {
-      throw std::runtime_error{"bin_file<float> should has .fbin suffix: " + file_};
-    }
-  } else if constexpr (std::is_same_v<T, half>) {
-    if (suffix != "f16bin" && suffix != "fbin") {
-      throw std::runtime_error{"bin_file<half> should has .f16bin suffix: " + file_};
-    }
-  } else if constexpr (std::is_same_v<T, int>) {
-    if (suffix != "ibin") {
-      throw std::runtime_error{"bin_file<int> should has .ibin suffix: " + file_};
-    }
-  } else if constexpr (std::is_same_v<T, uint8_t>) {
-    if (suffix != "u8bin") {
-      throw std::runtime_error{"bin_file<uint8_t> should has .u8bin suffix: " + file_};
-    }
-  } else if constexpr (std::is_same_v<T, int8_t>) {
-    if (suffix != "i8bin") {
-      throw std::runtime_error{"bin_file<int8_t> should has .i8bin suffix: " + file_};
-    }
-  } else {
-    throw std::runtime_error(
-      "T of bin_file<T> should be one of float, half, int, uint8_t, or int8_t");
+  [[nodiscard]] auto base_set_size() const -> size_t
+  {
+    base_set_accessed_ = true;
+    return base_set_.n_rows();
   }
-}
-
-template <typename T>
-class dataset {
- public:
-  explicit dataset(std::string name) : name_(std::move(name)) {}
-  dataset(std::string name, std::string distance)
-    : name_(std::move(name)), distance_(std::move(distance))
+  [[nodiscard]] auto query_set_size() const -> size_t
   {
+    query_set_accessed_ = true;
+    return query_set_.n_rows();
   }
-  dataset(const dataset&)                    = delete;
-  auto operator=(const dataset&) -> dataset& = delete;
-  virtual ~dataset();
-
-  auto name() const -> std::string { return name_; }
-  auto distance() const -> std::string { return distance_; }
-  virtual auto dim() const -> int               = 0;
-  virtual auto max_k() const -> uint32_t        = 0;
-  virtual auto base_set_size() const -> size_t  = 0;
-  virtual auto query_set_size() const -> size_t = 0;
 
-  // load data lazily, so don't pay the overhead of reading unneeded set
-  // e.g. don't load base set when searching
-  auto base_set() const -> const T*
+  [[nodiscard]] auto gt_set() const -> const IdxT*
   {
-    if (!base_set_) { load_base_set(); }
-    return base_set_;
+    if (ground_truth_set_.has_value()) { return ground_truth_set_->data(); }
+    return nullptr;
   }
 
-  auto query_set() const -> const T*
+  [[nodiscard]] auto query_set() const -> const DataT*
   {
-    if (!query_set_) { load_query_set(); }
-    return query_set_;
+    query_set_accessed_ = true;
+    return query_set_.data();
   }
-
-  auto gt_set() const -> const int32_t*
+  [[nodiscard]] auto query_set(MemoryType memory_type,
+                               HugePages request_hugepages_2mb = HugePages::kDisable) const
+    -> const DataT*
   {
-    if (!gt_set_) { load_gt_set(); }
-    return gt_set_;
+    query_set_accessed_ = true;
+    return query_set_.data(memory_type, request_hugepages_2mb);
   }
 
-  auto base_set_on_gpu() const -> const T*;
-  auto query_set_on_gpu() const -> const T*;
-  auto mapped_base_set() const -> const T*;
-
-  auto query_set(MemoryType memory_type) const -> const T*
+  [[nodiscard]] auto base_set() const -> const DataT*
   {
-    switch (memory_type) {
-      case MemoryType::kDevice: return query_set_on_gpu();
-      case MemoryType::kHost: {
-        auto r = query_set();
-#ifndef BUILD_CPU_ONLY
-        if (query_set_pinned_) {
-          cudaHostUnregister(const_cast<T*>(r));
-          query_set_pinned_ = false;
-        }
-#endif
-        return r;
-      }
-      case MemoryType::kHostPinned: {
-        auto r = query_set();
-#ifndef BUILD_CPU_ONLY
-        if (!query_set_pinned_) {
-          cudaHostRegister(
-            const_cast<T*>(r), query_set_size() * dim() * sizeof(T), cudaHostRegisterDefault);
-          query_set_pinned_ = true;
-        }
-#endif
-        return r;
-      }
-      default: return nullptr;
-    }
+    base_set_accessed_ = true;
+    return base_set_.data();
   }
-
-  auto base_set(MemoryType memory_type) const -> const T*
+  [[nodiscard]] auto base_set(MemoryType memory_type,
+                              HugePages request_hugepages_2mb = HugePages::kDisable) const
+    -> const DataT*
   {
-    switch (memory_type) {
-      case MemoryType::kDevice: return base_set_on_gpu();
-      case MemoryType::kHost: {
-        auto r = base_set();
-#ifndef BUILD_CPU_ONLY
-        if (base_set_pinned_) {
-          cudaHostUnregister(const_cast<T*>(r));
-          base_set_pinned_ = false;
-        }
-#endif
-        return r;
-      }
-      case MemoryType::kHostPinned: {
-        auto r = base_set();
-#ifndef BUILD_CPU_ONLY
-        if (!base_set_pinned_) {
-          cudaHostRegister(
-            const_cast<T*>(r), base_set_size() * dim() * sizeof(T), cudaHostRegisterDefault);
-          base_set_pinned_ = true;
-        }
-#endif
-        return r;
-      }
-      case MemoryType::kHostMmap: return mapped_base_set();
-      default: return nullptr;
-    }
+    base_set_accessed_ = true;
+    return base_set_.data(memory_type, request_hugepages_2mb);
   }
 
- protected:
-  virtual void load_base_set() const  = 0;
-  virtual void load_gt_set() const    = 0;
-  virtual void load_query_set() const = 0;
-  virtual void map_base_set() const   = 0;
-
-  std::string name_;
-  std::string distance_;
-
-  mutable T* base_set_        = nullptr;
-  mutable T* query_set_       = nullptr;
-  mutable T* d_base_set_      = nullptr;
-  mutable T* d_query_set_     = nullptr;
-  mutable T* mapped_base_set_ = nullptr;
-  mutable int32_t* gt_set_    = nullptr;
-
-  mutable bool base_set_pinned_  = false;
-  mutable bool query_set_pinned_ = false;
-};
-
-template <typename T>
-dataset<T>::~dataset()
-{
-#ifndef BUILD_CPU_ONLY
-  if (d_base_set_) { cudaFree(d_base_set_); }
-  if (d_query_set_) { cudaFree(d_query_set_); }
-  if (base_set_pinned_) { cudaHostUnregister(base_set_); }
-  if (query_set_pinned_) { cudaHostUnregister(query_set_); }
-#endif
-  delete[] base_set_;
-  delete[] query_set_;
-  delete[] gt_set_;
-}
-
-template <typename T>
-auto dataset<T>::base_set_on_gpu() const -> const T*
-{
-#ifndef BUILD_CPU_ONLY
-  if (!d_base_set_) {
-    base_set();
-    cudaMalloc(reinterpret_cast<void**>(&d_base_set_), base_set_size() * dim() * sizeof(T));
-    cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
+  [[nodiscard]] auto filter_bitset() const -> const bitset_carrier_type*
+  {
+    if (filter_bitset_.has_value()) { return filter_bitset_->data(); }
+    return nullptr;
   }
-#endif
-  return d_base_set_;
-}
 
-template <typename T>
-auto dataset<T>::query_set_on_gpu() const -> const T*
-{
-#ifndef BUILD_CPU_ONLY
-  if (!d_query_set_) {
-    query_set();
-    cudaMalloc(reinterpret_cast<void**>(&d_query_set_), query_set_size() * dim() * sizeof(T));
-    cudaMemcpy(
-      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
+  [[nodiscard]] auto filter_bitset(MemoryType memory_type,
+                                   HugePages request_hugepages_2mb = HugePages::kDisable) const
+    -> const bitset_carrier_type*
+  {
+    if (filter_bitset_.has_value()) {
+      return filter_bitset_->data(memory_type, request_hugepages_2mb);
+    }
+    return nullptr;
   }
-#endif
-  return d_query_set_;
-}
-
-template <typename T>
-auto dataset<T>::mapped_base_set() const -> const T*
-{
-  if (!mapped_base_set_) { map_base_set(); }
-  return mapped_base_set_;
-}
-
-template <typename T>
-class bin_dataset : public dataset<T> {
- public:
-  bin_dataset(const std::string& name,
-              const std::string& base_file,
-              size_t subset_first_row,
-              size_t subset_size,
-              const std::string& query_file,
-              const std::string& distance,
-              const std::optional<std::string>& groundtruth_neighbors_file);
-
-  auto dim() const -> int override;
-  auto max_k() const -> uint32_t override;
-  auto base_set_size() const -> size_t override;
-  auto query_set_size() const -> size_t override;
-
- private:
-  void load_base_set() const;
-  void load_query_set() const;
-  void load_gt_set() const;
-  void map_base_set() const;
-
-  mutable int dim_               = 0;
-  mutable uint32_t max_k_        = 0;
-  mutable size_t base_set_size_  = 0;
-  mutable size_t query_set_size_ = 0;
-
-  bin_file<T> base_file_;
-  bin_file<T> query_file_;
-  std::optional<bin_file<std::int32_t>> gt_file_{std::nullopt};
 };
 
-template <typename T>
-bin_dataset<T>::bin_dataset(const std::string& name,
-                            const std::string& base_file,
-                            size_t subset_first_row,
-                            size_t subset_size,
-                            const std::string& query_file,
-                            const std::string& distance,
-                            const std::optional<std::string>& groundtruth_neighbors_file)
-  : dataset<T>(name, distance),
-    base_file_(base_file, "r", subset_first_row, subset_size),
-    query_file_(query_file, "r")
-{
-  if (groundtruth_neighbors_file.has_value()) {
-    gt_file_.emplace(groundtruth_neighbors_file.value(), "r");
-  }
-}
-
-template <typename T>
-auto bin_dataset<T>::dim() const -> int
-{
-  if (dim_ > 0) { return dim_; }
-  if (base_set_size() > 0) { return dim_; }
-  if (query_set_size() > 0) { return dim_; }
-  return dim_;
-}
-
-template <typename T>
-auto bin_dataset<T>::max_k() const -> uint32_t
-{
-  if (!this->gt_set_) { load_gt_set(); }
-  return max_k_;
-}
-
-template <typename T>
-auto bin_dataset<T>::query_set_size() const -> size_t
-{
-  if (query_set_size_ > 0) { return query_set_size_; }
-  int dim;
-  query_file_.get_shape(&query_set_size_, &dim);
-  if (query_set_size_ == 0) { throw std::runtime_error{"Zero query set size"}; }
-  if (dim == 0) { throw std::runtime_error{"Zero query set dim"}; }
-  if (dim_ == 0) {
-    dim_ = dim;
-  } else if (dim_ != dim) {
-    throw std::runtime_error{"base set dim (" + std::to_string(dim_) + ") != query set dim (" +
-                             std::to_string(dim)};
-  }
-  return query_set_size_;
-}
-
-template <typename T>
-auto bin_dataset<T>::base_set_size() const -> size_t
-{
-  if (base_set_size_ > 0) { return base_set_size_; }
-  int dim;
-  base_file_.get_shape(&base_set_size_, &dim);
-  if (base_set_size_ == 0) { throw std::runtime_error{"Zero base set size"}; }
-  if (dim == 0) { throw std::runtime_error{"Zero base set dim"}; }
-  if (dim_ == 0) {
-    dim_ = dim;
-  } else if (dim_ != dim) {
-    throw std::runtime_error{"base set dim (" + std::to_string(dim) + ") != query set dim (" +
-                             std::to_string(dim_)};
-  }
-  return base_set_size_;
-}
-
-template <typename T>
-void bin_dataset<T>::load_base_set() const
-{
-  this->base_set_ = new T[base_set_size() * dim()];
-  base_file_.read(this->base_set_);
-}
-
-template <typename T>
-void bin_dataset<T>::load_query_set() const
-{
-  this->query_set_ = new T[query_set_size() * dim()];
-  query_file_.read(this->query_set_);
-}
-
-template <typename T>
-void bin_dataset<T>::load_gt_set() const
-{
-  if (gt_file_.has_value()) {
-    size_t queries;
-    int k;
-    gt_file_->get_shape(&queries, &k);
-    this->gt_set_ = new std::int32_t[queries * k];
-    gt_file_->read(this->gt_set_);
-    max_k_ = k;
-  }
-}
-
-template <typename T>
-void bin_dataset<T>::map_base_set() const
-{
-  this->mapped_base_set_ = base_file_.map();
-}
-
 }  // namespace  cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
index 11e0e4ad3..441a1eafa 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
@@ -275,4 +275,23 @@ void refine_helper(const raft::resources& res,
   }
 }
 
+/**
+ * Construct a cuVS-compatible bitset filter object from a raw pointer to the bitset.
+ *
+ * @param[in] filter_bitset a pointer to a pre-generated bitset
+ * @param[in] n_rows the number of elements in the bitset / dataset.
+ * @return a shared pointer to the filter object (doesn't own the bitset data!)
+ */
+inline auto make_cuvs_filter(const void* filter_bitset, int64_t n_rows)
+  -> std::shared_ptr<cuvs::neighbors::filtering::base_filter>
+{
+  if (filter_bitset != nullptr) {
+    return std::make_shared<cuvs::neighbors::filtering::bitset_filter<uint32_t, int64_t>>(
+      raft::core::bitset_view<uint32_t, int64_t>(
+        const_cast<uint32_t*>(reinterpret_cast<const uint32_t*>(filter_bitset)), n_rows));
+  } else {
+    return std::make_shared<cuvs::neighbors::filtering::none_sample_filter>();
+  }
+}
+
 }  // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
index 6670ed892..af72dd02b 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
@@ -47,7 +47,7 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void search(const T* queries,
               int batch_size,
@@ -102,8 +102,10 @@ void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T, typename IdxT>
-void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
+void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_,
+                                                   const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   search_param_ = dynamic_cast<const search_param&>(param_);
 }
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index 8c9cb2d4f..f5f609710 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -119,7 +119,7 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void set_search_dataset(const T* dataset, size_t nrow) override;
 
@@ -187,6 +187,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   size_t dynamic_batching_n_queues_;
   bool dynamic_batching_conservative_dispatch_;
 
+  std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
+
   inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type)
   {
     switch (mem_type) {
@@ -228,8 +230,10 @@ inline auto allocator_to_string(AllocatorType mem_type) -> std::string
 }
 
 template <typename T, typename IdxT>
-void cuvs_cagra<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_cagra<T, IdxT>::set_search_param(const search_param_base& param,
+                                           const void* filter_bitset)
 {
+  filter_ = make_cuvs_filter(filter_bitset, index_->size());
   auto sp = dynamic_cast<const search_param&>(param);
   bool needs_dynamic_batcher_update =
     (dynamic_batching_max_batch_size_ != sp.dynamic_batching_max_batch_size) ||
@@ -292,7 +296,8 @@ void cuvs_cagra<T, IdxT>::set_search_param(const search_param_base& param)
                                                         sp.dynamic_batching_n_queues,
                                                         sp.dynamic_batching_conservative_dispatch},
         *index_,
-        search_params_);
+        search_params_,
+        filter_.get());
     }
     dynamic_batcher_sp_.dispatch_timeout_ms = sp.dynamic_batching_dispatch_timeout_ms;
   } else {
@@ -379,7 +384,7 @@ void cuvs_cagra<T, IdxT>::search_base(const T* queries,
                                               distances_view);
   } else {
     cuvs::neighbors::cagra::search(
-      handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);
+      handle_, search_params_, *index_, queries_view, neighbors_view, distances_view, *filter_);
   }
 
   if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) {
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h
index b7e335622..a9dec026e 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h
@@ -59,7 +59,7 @@ class cuvs_ivf_flat : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void search(const T* queries,
               int batch_size,
@@ -92,6 +92,8 @@ class cuvs_ivf_flat : public algo<T>, public algo_gpu {
   std::shared_ptr<cuvs::neighbors::ivf_flat::index<T, IdxT>> index_;
   int device_;
   int dimension_;
+
+  std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
 };
 
 template <typename T, typename IdxT>
@@ -111,8 +113,10 @@ void cuvs_ivf_flat<T, IdxT>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T, typename IdxT>
-void cuvs_ivf_flat<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_ivf_flat<T, IdxT>::set_search_param(const search_param_base& param,
+                                              const void* filter_bitset)
 {
+  filter_        = make_cuvs_filter(filter_bitset, index_->size());
   auto sp        = dynamic_cast<const search_param&>(param);
   search_params_ = sp.ivf_flat_params;
   assert(search_params_.n_probes <= index_params_.n_lists);
@@ -162,7 +166,8 @@ void cuvs_ivf_flat<T, IdxT>::search(
     *index_,
     raft::make_device_matrix_view<const T, int64_t>(queries, batch_size, index_->dim()),
     raft::make_device_matrix_view<IdxT, int64_t>(neighbors_idx_t, batch_size, k),
-    raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k));
+    raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k),
+    *filter_);
   if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) {
     raft::linalg::unaryOp(neighbors,
                           neighbors_idx_t,
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h
index dac766669..5bef85b7e 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h
@@ -67,7 +67,7 @@ class cuvs_ivf_pq : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
   void set_search_dataset(const T* dataset, size_t nrow) override;
 
   void search(const T* queries,
@@ -110,6 +110,8 @@ class cuvs_ivf_pq : public algo<T>, public algo_gpu {
 
   std::shared_ptr<cuvs::neighbors::dynamic_batching::index<T, IdxT>> dynamic_batcher_;
   cuvs::neighbors::dynamic_batching::search_params dynamic_batcher_sp_{};
+
+  std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
 };
 
 template <typename T, typename IdxT>
@@ -144,8 +146,10 @@ std::unique_ptr<algo<T>> cuvs_ivf_pq<T, IdxT>::copy()
 }
 
 template <typename T, typename IdxT>
-void cuvs_ivf_pq<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_ivf_pq<T, IdxT>::set_search_param(const search_param_base& param,
+                                            const void* filter_bitset)
 {
+  filter_        = make_cuvs_filter(filter_bitset, index_->size());
   auto sp        = dynamic_cast<const search_param&>(param);
   search_params_ = sp.pq_param;
   refine_ratio_  = sp.refine_ratio;
@@ -160,7 +164,8 @@ void cuvs_ivf_pq<T, IdxT>::set_search_param(const search_param_base& param)
                                                       sp.dynamic_batching_n_queues,
                                                       sp.dynamic_batching_conservative_dispatch},
       *index_,
-      search_params_);
+      search_params_,
+      filter_.get());
     dynamic_batcher_sp_.dispatch_timeout_ms = sp.dynamic_batching_dispatch_timeout_ms;
   } else {
     dynamic_batcher_.reset();
@@ -204,7 +209,7 @@ void cuvs_ivf_pq<T, IdxT>::search_base(
                                               distances_view);
   } else {
     cuvs::neighbors::ivf_pq::search(
-      handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);
+      handle_, search_params_, *index_, queries_view, neighbors_view, distances_view, *filter_);
   }
 
   if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) {
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h
index 50c1ff4db..cb2bbd9b5 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h
@@ -52,7 +52,7 @@ class cuvs_mg_cagra : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void set_search_dataset(const T* dataset, size_t nrow) override;
 
@@ -114,8 +114,10 @@ void cuvs_mg_cagra<T, IdxT>::build(const T* dataset, size_t nrow)
 inline auto allocator_to_string(AllocatorType mem_type) -> std::string;
 
 template <typename T, typename IdxT>
-void cuvs_mg_cagra<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_mg_cagra<T, IdxT>::set_search_param(const search_param_base& param,
+                                              const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   auto sp = dynamic_cast<const search_param&>(param);
   // search_params_ = static_cast<mg::search_params<cagra::search_params>>(sp.p);
   cagra::search_params* search_params_ptr_ = static_cast<cagra::search_params*>(&search_params_);
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
index 0540edc8f..b9f8b4b32 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
@@ -45,7 +45,7 @@ class cuvs_mg_ivf_flat : public algo<T>, public algo_gpu {
   }
 
   void build(const T* dataset, size_t nrow) final;
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
   void search(const T* queries,
               int batch_size,
               int k,
@@ -91,8 +91,10 @@ void cuvs_mg_ivf_flat<T, IdxT>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T, typename IdxT>
-void cuvs_mg_ivf_flat<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_mg_ivf_flat<T, IdxT>::set_search_param(const search_param_base& param,
+                                                 const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   auto sp = dynamic_cast<const search_param&>(param);
   // search_params_ = sp.ivf_flat_params;
   ivf_flat::search_params* search_params_ptr_ =
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
index 65ca1bb11..26781c522 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
@@ -45,7 +45,7 @@ class cuvs_mg_ivf_pq : public algo<T>, public algo_gpu {
   }
 
   void build(const T* dataset, size_t nrow) final;
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
   void search(const T* queries,
               int batch_size,
               int k,
@@ -91,8 +91,10 @@ void cuvs_mg_ivf_pq<T, IdxT>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T, typename IdxT>
-void cuvs_mg_ivf_pq<T, IdxT>::set_search_param(const search_param_base& param)
+void cuvs_mg_ivf_pq<T, IdxT>::set_search_param(const search_param_base& param,
+                                               const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   auto sp = dynamic_cast<const search_param&>(param);
   // search_params_ = static_cast<mg::search_params<ivf_pq::search_params>>(sp.pq_param);
   ivf_pq::search_params* search_params_ptr_ = static_cast<ivf_pq::search_params*>(&search_params_);
diff --git a/cpp/bench/ann/src/cuvs/cuvs_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_wrapper.h
index bf0fa5934..69bdaeb12 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_wrapper.h
@@ -58,7 +58,7 @@ class cuvs_gpu : public algo<T>, public algo_gpu {
 
   void build(const T*, size_t) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void search(const T* queries,
               int batch_size,
@@ -91,6 +91,8 @@ class cuvs_gpu : public algo<T>, public algo_gpu {
   int device_;
   const T* dataset_;
   size_t nrow_;
+
+  std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
 };
 
 template <typename T>
@@ -111,9 +113,9 @@ void cuvs_gpu<T>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T>
-void cuvs_gpu<T>::set_search_param(const search_param_base&)
+void cuvs_gpu<T>::set_search_param(const search_param_base&, const void* filter_bitset)
 {
-  // Nothing to set here as it is brute force implementation
+  filter_ = make_cuvs_filter(filter_bitset, index_->size());
 }
 
 template <typename T>
@@ -155,12 +157,8 @@ void cuvs_gpu<T>::search(
     raft::make_device_matrix_view<algo_base::index_type, int64_t>(neighbors, batch_size, k);
   auto distances_view = raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k);
 
-  cuvs::neighbors::brute_force::search(handle_,
-                                       *index_,
-                                       queries_view,
-                                       neighbors_view,
-                                       distances_view,
-                                       cuvs::neighbors::filtering::none_sample_filter{});
+  cuvs::neighbors::brute_force::search(
+    handle_, *index_, queries_view, neighbors_view, distances_view, *filter_);
 }
 
 template <typename T>
diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
index 0cc40de37..4cb630ed2 100644
--- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
@@ -75,7 +75,7 @@ class faiss_cpu : public algo<T> {
 
   void build(const T* dataset, size_t nrow) final;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
 
   void init_quantizer(int dim)
   {
@@ -153,8 +153,9 @@ void faiss_cpu<T>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T>
-void faiss_cpu<T>::set_search_param(const search_param_base& param)
+void faiss_cpu<T>::set_search_param(const search_param_base& param, const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   auto sp    = dynamic_cast<const search_param&>(param);
   int nprobe = sp.nprobe;
   assert(nprobe <= nlist_);
@@ -303,8 +304,10 @@ class faiss_cpu_flat : public faiss_cpu<T> {
   }
 
   // class faiss_cpu is more like a IVF class, so need special treating here
-  void set_search_param(const typename algo<T>::search_param& param) override
+  void set_search_param(const typename algo<T>::search_param& param,
+                        const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto search_param = dynamic_cast<const typename faiss_cpu<T>::search_param&>(param);
     if (!this->thread_pool_ || this->num_threads_ != search_param.num_threads) {
       this->num_threads_ = search_param.num_threads;
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
index 6cf1fe4b0..ed80917e8 100644
--- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
@@ -111,7 +111,7 @@ class faiss_gpu : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) final;
 
-  virtual void set_search_param(const search_param_base& param) {}
+  virtual void set_search_param(const search_param_base& param, const void* filter_bitset) {}
 
   void set_search_dataset(const T* dataset, size_t nrow) override { dataset_ = dataset; }
 
@@ -329,8 +329,9 @@ class faiss_gpu_ivf_flat : public faiss_gpu<T> {
       this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
   }
 
-  void set_search_param(const search_param_base& param) override
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto sp    = dynamic_cast<const typename faiss_gpu<T>::search_param&>(param);
     int nprobe = sp.nprobe;
     assert(nprobe <= this->nlist_);
@@ -386,8 +387,9 @@ class faiss_gpu_ivfpq : public faiss_gpu<T> {
                                                                config);
   }
 
-  void set_search_param(const search_param_base& param) override
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto sp    = dynamic_cast<const typename faiss_gpu<T>::search_param&>(param);
     int nprobe = sp.nprobe;
     assert(nprobe <= this->nlist_);
@@ -449,8 +451,9 @@ class faiss_gpu_ivfsq : public faiss_gpu<T> {
       this->gpu_resource_.get(), dim, param.nlist, qtype, this->metric_type_, true, config);
   }
 
-  void set_search_param(const search_param_base& param) override
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto sp    = dynamic_cast<const typename faiss_gpu<T>::search_param&>(param);
     int nprobe = sp.nprobe;
     assert(nprobe <= this->nlist_);
@@ -493,8 +496,9 @@ class faiss_gpu_flat : public faiss_gpu<T> {
     this->index_  = std::make_shared<faiss::gpu::GpuIndexFlat>(
       this->gpu_resource_.get(), dim, this->metric_type_, config);
   }
-  void set_search_param(const search_param_base& param) override
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto sp    = dynamic_cast<const typename faiss_gpu<T>::search_param&>(param);
     int nprobe = sp.nprobe;
     assert(nprobe <= this->nlist_);
@@ -548,8 +552,9 @@ class faiss_gpu_cagra : public faiss_gpu<T> {
       this->gpu_resource_.get(), dim, parse_metric_faiss(this->metric_), config);
   }
 
-  void set_search_param(const search_param_base& param) override
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
   {
+    if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
     auto sp              = static_cast<const typename faiss_gpu_cagra<T>::search_param&>(param);
     this->search_params_ = std::make_shared<faiss::gpu::SearchParametersCagra>(sp.p);
   }
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
index e2ca18e22..b1c6c7b26 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
@@ -57,7 +57,10 @@ class ggnn : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) override { impl_->build(dataset, nrow); }
 
-  void set_search_param(const search_param_base& param) override { impl_->set_search_param(param); }
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override
+  {
+    impl_->set_search_param(param, filter_bitset);
+  }
   void search(const T* queries,
               int batch_size,
               int k,
@@ -128,7 +131,7 @@ class ggnn_impl : public algo<T>, public algo_gpu {
 
   void build(const T* dataset, size_t nrow) override;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
   void search(const T* queries,
               int batch_size,
               int k,
@@ -243,8 +246,10 @@ void ggnn_impl<T, measure, D, KBuild, KQuery, S>::set_search_dataset(const T* da
 }
 
 template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void ggnn_impl<T, measure, D, KBuild, KQuery, S>::set_search_param(const search_param_base& param)
+void ggnn_impl<T, measure, D, KBuild, KQuery, S>::set_search_param(const search_param_base& param,
+                                                                   const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   search_param_ = dynamic_cast<const typename ggnn<T>::search_param&>(param);
 }
 
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
index 9d643f12a..d6870ae1c 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -79,7 +79,7 @@ class hnsw_lib : public algo<T> {
 
   void build(const T* dataset, size_t nrow) override;
 
-  void set_search_param(const search_param_base& param) override;
+  void set_search_param(const search_param_base& param, const void* filter_bitset) override;
   void search(const T* query,
               int batch_size,
               int k,
@@ -169,8 +169,9 @@ void hnsw_lib<T>::build(const T* dataset, size_t nrow)
 }
 
 template <typename T>
-void hnsw_lib<T>::set_search_param(const search_param_base& param_)
+void hnsw_lib<T>::set_search_param(const search_param_base& param_, const void* filter_bitset)
 {
+  if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); }
   auto param     = dynamic_cast<const search_param&>(param_);
   appr_alg_->ef_ = param.ef;
   num_threads_   = param.num_threads;