diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index c2f85e539..eed18272c 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -37,6 +37,19 @@ enum class MemoryType { kHostMmap, kHostPinned, kDevice, + kManaged, +}; + +/** Request 2MB huge pages support for an allocation */ +enum class HugePages { + /** Don't use huge pages if possible. */ + kDisable = 0, + /** Enable huge pages if possible, ignore otherwise. */ + kAsk = 1, + /** Enable huge pages if possible, warn the user otherwise. */ + kRequire = 2, + /** Force enable huge pages, throw an exception if not possible. */ + kDemand = 3 }; enum class Metric { @@ -65,6 +78,8 @@ inline auto parse_memory_type(const std::string& memory_type) -> MemoryType return MemoryType::kHostPinned; } else if (memory_type == "device") { return MemoryType::kDevice; + } else if (memory_type == "managed") { + return MemoryType::kManaged; } else { throw std::runtime_error("invalid memory type: '" + memory_type + "'"); } @@ -130,7 +145,7 @@ class algo : public algo_base { virtual void build(const T* dataset, size_t nrow) = 0; - virtual void set_search_param(const search_param& param) = 0; + virtual void set_search_param(const search_param& param, const void* filter_bitset) = 0; // TODO(snanditale): this assumes that an algorithm can always return k results. // This is not always possible. virtual void search(const T* queries, diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 49be78673..8d9c30cb1 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -271,7 +271,8 @@ void bench_search(::benchmark::State& state, } } try { - a->set_search_param(*search_param); + a->set_search_param(*search_param, + dataset->filter_bitset(current_algo_props->dataset_memory_type)); } catch (const std::exception& ex) { state.SkipWithError("An error occurred setting search parameters: " + std::string(ex.what())); return; @@ -359,13 +360,19 @@ void bench_search(::benchmark::State& state, // Each thread calculates recall on their partition of queries. // evaluate recall if (dataset->max_k() >= k) { - const std::int32_t* gt = dataset->gt_set(); + const std::int32_t* gt = dataset->gt_set(); + const std::uint32_t* filter_bitset = dataset->filter_bitset(MemoryType::kHostMmap); + auto filter = [filter_bitset](std::int32_t i) -> bool { + if (filter_bitset == nullptr) { return true; } + auto word = filter_bitset[i >> 5]; + return word & (1 << (i & 31)); + }; const std::uint32_t max_k = dataset->max_k(); result_buf.transfer_data(MemoryType::kHost, current_algo_props->query_memory_type); auto* neighbors_host = reinterpret_cast(result_buf.data(MemoryType::kHost)); std::size_t rows = std::min(queries_processed, query_set_size); std::size_t match_count = 0; - std::size_t total_count = rows * static_cast(k); + std::size_t total_count = 0; // We go through the groundtruth with same stride as the benchmark loop. size_t out_offset = 0; @@ -375,22 +382,44 @@ void bench_search(::benchmark::State& state, size_t i_orig_idx = batch_offset + i; size_t i_out_idx = out_offset + i; if (i_out_idx < rows) { - for (std::uint32_t j = 0; j < k; j++) { - auto act_idx = static_cast(neighbors_host[i_out_idx * k + j]); - for (std::uint32_t l = 0; l < k; l++) { - auto exp_idx = gt[i_orig_idx * max_k + l]; + /* NOTE: recall correctness & filtering + + In the loop below, we filter the ground truth values on-the-fly. + We need enough ground truth values to compute recall correctly though. + But the ground truth file only contains `max_k` values per row; if there are less valid + values than k among them, we overestimate the recall. Essentially, we compare the first + `filter_pass_count` values of the algorithm output, and this counter can be less than `k`. + In the extreme case of very high filtering rate, we may be bypassing entire rows of + results. However, this is still better than no recall estimate at all. + + TODO: consider generating the filtered ground truth on-the-fly + */ + uint32_t filter_pass_count = 0; + for (std::uint32_t l = 0; l < max_k && filter_pass_count < k; l++) { + auto exp_idx = gt[i_orig_idx * max_k + l]; + if (!filter(exp_idx)) { continue; } + filter_pass_count++; + for (std::uint32_t j = 0; j < k; j++) { + auto act_idx = static_cast(neighbors_host[i_out_idx * k + j]); if (act_idx == exp_idx) { match_count++; break; } } } + total_count += filter_pass_count; } } out_offset += n_queries; batch_offset = (batch_offset + queries_stride) % query_set_size; } double actual_recall = static_cast(match_count) / static_cast(total_count); + /* NOTE: recall in the throughput mode & filtering + + When filtering is enabled, `total_count` may vary between individual threads, but we still take + the simple average across in-thread recalls. Strictly speaking, this is incorrect, but it's good + enough under assumption that the filtering is more-or-less uniform. + */ state.counters.insert({"Recall", {actual_recall, benchmark::Counter::kAvgThreads}}); } } @@ -515,13 +544,15 @@ void dispatch_benchmark(std::string cmdline, auto query_file = combine_path(data_prefix, dataset_conf.query_file); auto gt_file = dataset_conf.groundtruth_neighbors_file; if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); } - auto dataset = std::make_shared>(dataset_conf.name, - base_file, - dataset_conf.subset_first_row, - dataset_conf.subset_size, - query_file, - dataset_conf.distance, - gt_file); + auto dataset = + std::make_shared>(dataset_conf.name, + base_file, + dataset_conf.subset_first_row, + dataset_conf.subset_size, + query_file, + dataset_conf.distance, + gt_file, + search_mode ? dataset_conf.filtering_rate : std::nullopt); ::benchmark::AddCustomContext("dataset", dataset_conf.name); ::benchmark::AddCustomContext("distance", dataset_conf.distance); std::vector indices = conf.get_indices(); diff --git a/cpp/bench/ann/src/common/blob.hpp b/cpp/bench/ann/src/common/blob.hpp new file mode 100644 index 000000000..81310ae0b --- /dev/null +++ b/cpp/bench/ann/src/common/blob.hpp @@ -0,0 +1,889 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "ann_types.hpp" +#include "util.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::bench { + +/** RAII wrapper for a file descriptor. */ +struct file_descriptor { + explicit file_descriptor(std::string file_name) : fd_{std::fopen(file_name.c_str(), "r")} + { + if (fd_ == nullptr) { + throw std::runtime_error( + "cuvs::bench::file_descriptor: failed to open a file (std::fopen): errno = " + + std::to_string(errno) + ", " + std::string(strerror(errno)) + ". File name: " + file_name); + } + } + + explicit file_descriptor(size_t file_size_bytes) : fd_{std::tmpfile()} + { + if (fd_ == nullptr) { + throw std::runtime_error( + "cuvs::bench::file_descriptor: failed to open a temporary file (std::tmpfile): errno = " + + std::to_string(errno) + ", " + std::string(strerror(errno))); + } + if (ftruncate(fileno(fd_), file_size_bytes) == -1) { + throw std::runtime_error( + "cuvs::bench::file_descriptor: failed to call `ftruncate` to allocate memory for a " + "temporary file."); + } + } + + // No copies for owning struct + file_descriptor(const file_descriptor& res) = delete; + auto operator=(const file_descriptor& other) -> file_descriptor& = delete; + // Moving is fine + file_descriptor(file_descriptor&& other) : fd_{std::exchange(other.fd_, nullptr)} {} + auto operator=(file_descriptor&& other) -> file_descriptor& + { + std::swap(this->fd_, other.fd_); + return *this; + } + + ~file_descriptor() noexcept + { + if (fd_ != nullptr) { std::fclose(fd_); } + } + + [[nodiscard]] auto value() const -> FILE* { return fd_; } + + private: + FILE* fd_ = nullptr; +}; + +class mmap_error : public std::runtime_error { + private: + int errno_; + + public: + mmap_error(std::string extra_msg) + : std::runtime_error("cuvs::bench::mmap_owner: `mmap` error: Value of errno " + + std::to_string(errno) + ", " + std::string(strerror(errno)) + ". " + + extra_msg), + errno_(errno) + { + } + + [[nodiscard]] auto code() const noexcept { return errno_; } +}; + +/** RAII wrapper for a mmap/munmap. */ +struct mmap_owner { + /** Map a file */ + mmap_owner( + const file_descriptor& descriptor, size_t offset, size_t size, int flags, bool writable = false) + : ptr_{mmap_verbose(size, + writable ? PROT_READ | PROT_WRITE : PROT_READ, + flags, + fileno(descriptor.value()), + offset)}, + size_{size} + { + } + + /** Allocate a new memory (not backed by a file). */ + mmap_owner(size_t size, int flags) + : ptr_{mmap_verbose(size, PROT_READ | PROT_WRITE, flags, -1, 0)}, size_{size} + { + } + + ~mmap_owner() noexcept + { + if (ptr_ != nullptr) { munmap(ptr_, size_); } + } + + // No copies for owning struct + mmap_owner(const mmap_owner& res) = delete; + auto operator=(const mmap_owner& other) -> mmap_owner& = delete; + // Moving is fine + mmap_owner(mmap_owner&& other) + : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)} + { + } + auto operator=(mmap_owner&& other) -> mmap_owner& + { + std::swap(this->ptr_, other.ptr_); + std::swap(this->size_, other.size_); + return *this; + } + + [[nodiscard]] auto data() const -> void* { return ptr_; } + [[nodiscard]] auto size() const -> size_t { return size_; } + + private: + void* ptr_; + size_t size_; + + static inline auto mmap_verbose(size_t length, int prot, int flags, int fd, off_t offset) -> void* + { + auto ptr = mmap(nullptr, length, prot, flags, fd, offset); + if (ptr == MAP_FAILED) { + std::array buf; + snprintf(buf.data(), + sizeof(buf), + "Failed call: cuvs::bench::mmap_owner:mmap(nullptr, %zu, 0x%08x, 0x%08x, %d, %zd)", + length, + prot, + flags, + fd, + offset); + throw mmap_error{std::string(buf.data())}; + } + return ptr; + } +}; + +/** RAII wrapper for managed memory. */ +struct managed_mem_owner { + explicit managed_mem_owner(size_t size) : size_{size} + { +#ifndef BUILD_CPU_ONLY + auto err_code = cudaMallocManaged(&ptr_, size_); + if (err_code != cudaSuccess) { + ptr_ = nullptr; + throw std::runtime_error{ + "cuvs::bench::managed_mem_owner: call to cudaMallocManaged failed with code " + + std::to_string(err_code)}; + } +#else + throw std::runtime_error{ + "Device functions are not available when built with BUILD_CPU_ONLY flag."}; +#endif + } + + ~managed_mem_owner() noexcept + { + if (ptr_ != nullptr) { +#ifndef BUILD_CPU_ONLY + cudaFree(ptr_); +#endif + } + } + + // No copies for owning struct + managed_mem_owner(const managed_mem_owner& res) = delete; + auto operator=(const managed_mem_owner& other) -> managed_mem_owner& = delete; + // Moving is fine + managed_mem_owner(managed_mem_owner&& other) + : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)} + { + } + auto operator=(managed_mem_owner&& other) -> managed_mem_owner& + { + std::swap(this->ptr_, other.ptr_); + std::swap(this->size_, other.size_); + return *this; + } + + [[nodiscard]] auto data() const -> void* { return ptr_; } + [[nodiscard]] auto size() const -> size_t { return size_; } + + private: + void* ptr_ = nullptr; + size_t size_; +}; + +/** RAII wrapper for device memory. */ +struct device_mem_owner { + explicit device_mem_owner(size_t size) : size_{size} + { +#ifndef BUILD_CPU_ONLY + auto err_code = cudaMalloc(&ptr_, size_); + if (err_code != cudaSuccess) { + ptr_ = nullptr; + throw std::runtime_error{ + "cuvs::bench::device_mem_owner: call to cudaMalloc failed with code " + + std::to_string(err_code)}; + } +#else + throw std::runtime_error{ + "Device functions are not available when built with BUILD_CPU_ONLY flag."}; +#endif + } + ~device_mem_owner() noexcept + { + if (ptr_ != nullptr) { +#ifndef BUILD_CPU_ONLY + cudaFree(ptr_); +#endif + } + } + // No copies for owning struct + device_mem_owner(const device_mem_owner& res) = delete; + auto operator=(const device_mem_owner& other) -> device_mem_owner& = delete; + // Moving is fine + device_mem_owner(device_mem_owner&& other) + : ptr_{std::exchange(other.ptr_, nullptr)}, size_{std::exchange(other.size_, 0)} + { + } + auto operator=(device_mem_owner&& other) -> device_mem_owner& + { + std::swap(this->ptr_, other.ptr_); + std::swap(this->size_, other.size_); + return *this; + } + + [[nodiscard]] auto data() const -> void* { return ptr_; } + [[nodiscard]] auto size() const -> size_t { return size_; } + + private: + void* ptr_ = nullptr; + size_t size_; +}; + +/** Lazy-initialized file handle. */ +struct file { + explicit file(std::string file_name) : reqsize_or_name_{std::move(file_name)} {} + explicit file(size_t tmp_size_bytes) : reqsize_or_name_{tmp_size_bytes} {} + + // this shouldn't be necessary, but adds extra safety (make sure descriptors are not copied) + file(const file&) = delete; + auto operator=(const file&) -> file& = delete; + file(file&&); + auto operator=(file&&) -> file&; + + [[nodiscard]] auto descriptor() const -> const file_descriptor& + { + if (!descriptor_.has_value()) { + std::visit([&d = descriptor_](auto&& x) { d.emplace(x); }, reqsize_or_name_); + } + return descriptor_.value(); + } + + [[nodiscard]] auto path() const -> std::string + { + return std::holds_alternative(reqsize_or_name_) + ? std::get(reqsize_or_name_) + : ""; + } + + [[nodiscard]] auto size() const -> size_t + { + if (!size_.has_value()) { + size_.emplace(std::visit( + [&s = size_](auto&& x) { + if constexpr (std::is_same_v, size_t>) { + return x; + } else { + struct stat statbuf; + if (stat(x.c_str(), &statbuf) != 0) { + throw std::runtime_error{"cuvs::bench::file::size() error: `stat` failed: " + x}; + } + return static_cast(statbuf.st_size); + } + }, + reqsize_or_name_)); + } + return size_.value(); + } + + [[nodiscard]] auto is_temporary() const -> bool + { + return std::holds_alternative(reqsize_or_name_); + } + + void reset_lazy_state() const + { + descriptor_.reset(); + size_.reset(); + } + + private: + std::variant reqsize_or_name_; + mutable std::optional descriptor_ = std::nullopt; + mutable std::optional size_ = std::nullopt; +}; + +// declare the move constructors explicitly outside of the class declaration to make sure if +// anything is wrong, we catch that at compile time. +inline file::file(file&&) = default; +inline auto file::operator=(file&&) -> file& = default; + +/** + * Lazy-initialized file handle with the size information provided by our .bin format: + * The file always starts with two uint32_t values: [n_rows, n_cols]. + */ +template +struct blob_file : public file { + explicit blob_file(std::string file_name, uint32_t rows_offset = 0, uint32_t rows_limit = 0) + : file{std::move(file_name)}, rows_offset_{rows_offset}, rows_limit_{rows_limit} + { + } + + explicit blob_file(uint32_t n_rows, uint32_t n_cols) + : file{sizeof(T) * n_rows * n_cols + 2 * sizeof(uint32_t)}, rows_offset_{0}, rows_limit_{0} + { + // NB: this forces the file descriptor, thus breaking lazy-initialization. Not sure if it's + // worth refactoring in this case. + std::array h{n_rows, n_cols}; + if (std::fwrite(h.data(), sizeof(uint32_t), h.size(), descriptor().value()) != 2) { + throw std::runtime_error{ + "cuvs::bench::blob_file `fwrite` failed when initializing a tmp file."}; + } + if (std::fflush(descriptor().value()) != 0) { + throw std::runtime_error{ + "cuvs::bench::blob_file `fflush` failed with non-zero code when initializing a tmp file."}; + } + } + + blob_file(const blob_file&) = delete; + auto operator=(const blob_file&) -> blob_file& = delete; + blob_file(blob_file&&); + auto operator=(blob_file&&) -> blob_file&; + + [[nodiscard]] auto n_rows() const -> uint32_t { return header()[0]; } + [[nodiscard]] auto n_cols() const -> uint32_t { return header()[1]; } + + [[nodiscard]] auto rows_offset() const -> uint32_t { return rows_offset_; } + [[nodiscard]] auto rows_limit() const -> uint32_t + { + auto rows_max = n_rows() - std::min(rows_offset(), n_rows()); // available rows + return rows_limit_ == 0 ? rows_max : std::min(rows_limit_, rows_max); // limited rows + } + + void reset_lazy_state() const + { + file::reset_lazy_state(); + header_.reset(); + } + + private: + mutable std::optional> header_ = std::nullopt; + uint32_t rows_offset_; + uint32_t rows_limit_; + + [[nodiscard]] auto header() const -> const std::array& + { + if (!header_.has_value()) { + std::array h; + std::rewind(descriptor().value()); + if (std::fread(h.data(), sizeof(uint32_t), h.size(), descriptor().value()) != 2) { + throw std::runtime_error{"cuvs::bench::blob_file read header of bin file failed: " + + path()}; + } + header_.emplace(h); + } + return header_.value(); + } +}; + +// declare the move constructors explicitly outside of the class declaration to make sure if +// anything is wrong, we catch that at compile time. +template +inline blob_file::blob_file(blob_file&&) = default; +template +inline auto blob_file::operator=(blob_file&&) -> blob_file& = default; + +/** Lazily map or copy the file content onto host memory. */ +template +struct blob_mmap { + explicit blob_mmap(blob_file&& file, + bool copy_in_memory = false, + HugePages hugepages_2mb = HugePages::kDisable) + : file_{std::move(file)}, + copy_in_memory_{copy_in_memory}, + hugepages_2mb_requested_{hugepages_2mb}, + hugepages_2mb_actual_{hugepages_2mb > HugePages::kDisable} + { + } + explicit blob_mmap(std::string file_name, + uint32_t rows_offset = 0, + uint32_t rows_limit = 0, + bool copy_in_memory = false, + HugePages hugepages_2mb = HugePages::kDisable) + : blob_mmap{ + blob_file{std::move(file_name), rows_offset, rows_limit}, copy_in_memory, hugepages_2mb} + { + } + + private: + blob_file file_; + bool copy_in_memory_; + HugePages hugepages_2mb_requested_; + mutable bool hugepages_2mb_actual_; + + mutable std::optional> handle_; + + [[nodiscard]] auto handle() const -> const std::tuple& + { + if (!handle_.has_value()) { + size_t page_size = hugepages_2mb_actual_ ? 1024ull * 1024ull * 2ull : sysconf(_SC_PAGE_SIZE); + int flags = 0; + if (hugepages_2mb_actual_) { flags |= MAP_HUGETLB | MAP_HUGE_2MB; } + size_t data_start = sizeof(T) * file_.rows_offset() * file_.n_cols() + sizeof(uint32_t) * 2; + size_t data_end = sizeof(T) * file_.rows_limit() * file_.n_cols() + data_start; + + try { + if (copy_in_memory_) { + // Copy the content in-memory + flags |= MAP_ANONYMOUS | MAP_PRIVATE; + size_t size = data_end - data_start; + mmap_owner owner{size, flags}; + std::fseek(file_.descriptor().value(), data_start, SEEK_SET); + size_t n_elems = file_.rows_limit() * file_.n_cols(); + if (std::fread(owner.data(), sizeof(T), n_elems, file_.descriptor().value()) != n_elems) { + throw std::runtime_error{"cuvs::bench::blob_mmap() fread " + file_.path() + " failed"}; + } + handle_.emplace(std::move(owner), 0); + } else { + // Map the file + // If this is a temporary file, we're supposed to write to it, hence MAP_SHARED. + flags |= file_.is_temporary() ? MAP_SHARED : MAP_PRIVATE; + size_t mmap_start = (data_start / page_size) * page_size; + size_t mmap_size = data_end - mmap_start; + handle_.emplace( + mmap_owner{file_.descriptor(), mmap_start, mmap_size, flags, file_.is_temporary()}, + data_start - mmap_start); + } + } catch (const mmap_error& e) { + bool hugepages_2mb_asked = hugepages_2mb_requested_ == HugePages::kAsk || + hugepages_2mb_requested_ == HugePages::kRequire; + if (e.code() == EPERM && hugepages_2mb_asked && hugepages_2mb_actual_) { + if (hugepages_2mb_requested_ == HugePages::kRequire) { + log_warn( + "cuvs::bench::blob_mmap: `mmap` failed to map due to EPERM, which is likely caused " + "by the permissions issue. You either need a CAP_IPC_LOCK capability or run the " + "program with sudo. We will try again without huge pages."); + } + hugepages_2mb_actual_ = false; + return handle(); + } + if (e.code() == EINVAL && hugepages_2mb_asked && hugepages_2mb_actual_ && + !copy_in_memory_) { + if (hugepages_2mb_requested_ == HugePages::kRequire) { + log_warn( + "cuvs::bench::blob_mmap: `mmap` failed to map due to EINVAL, which is likely caused " + "by the file system not supporting huge pages. We will try again without huge " + "pages."); + } + hugepages_2mb_actual_ = false; + return handle(); + } + throw; // The error is not due to huge pages or otherwise unrecoverable + } + } + return handle_.value(); + } + + public: + [[nodiscard]] auto data() const -> T* + { + auto& [owner, offset] = handle(); + return reinterpret_cast(reinterpret_cast(owner.data()) + offset); + } + + [[nodiscard]] auto is_in_memory() const -> bool { return copy_in_memory_; } + [[nodiscard]] auto is_hugepage() const -> bool { return hugepages_2mb_actual_; } + /** + * Enabling hugepages is not always possible. For convenience, we may silently disable it in some + * cases. This function helps to decide whether the current setting is compatible with the + * request.R + */ + [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool + { + if (request == hugepages_2mb_requested_) { + // whatever the actual state is, the result would be the same if we recreated + // the mapping. + return true; + } + bool hp_enabled = hugepages_2mb_actual_ && request > HugePages::kDisable; + bool hp_disabled = !hugepages_2mb_actual_ && request == HugePages::kDisable; + bool hp_not_forced = !hugepages_2mb_actual_ && request == HugePages::kAsk; + return hp_enabled || hp_disabled || hp_not_forced; + } + [[nodiscard]] auto n_rows() const -> uint32_t { return file_.rows_limit(); } + [[nodiscard]] auto n_cols() const -> uint32_t { return file_.n_cols(); } + [[nodiscard]] auto size() const -> size_t { return sizeof(T) * n_rows() * n_cols(); } + [[nodiscard]] auto unmap() && noexcept -> blob_file + { + // If we used mmap on a temporary file, then it was writable. + // Then there's a chance the user wrote something to the mmap. + // Then we must ensure the changes are visible in the file before migrating it. + bool flush_writes = file_.is_temporary() && handle_.has_value(); + if (flush_writes) { + auto& [owner, _] = handle(); + msync(owner.data(), owner.size(), MS_SYNC | MS_INVALIDATE); + } + handle_.reset(); + if (flush_writes) { std::fflush(file_.descriptor().value()); } + return blob_file{std::move(file_)}; + } + [[nodiscard]] auto release() && noexcept -> blob_file { return std::move(*this).unmap(); } + + void reset_lazy_state() const + { + file_.reset_lazy_state(); + hugepages_2mb_actual_ = hugepages_2mb_requested_ > HugePages::kDisable; + } +}; + +template +struct blob_pinned { + private: + mutable blob_mmap blob_; + mutable void* ptr_ = nullptr; + + public: + explicit blob_pinned(blob_mmap&& blob) : blob_{std::move(blob)} {} + // First map the file and then register to CUDA. + // NB: as per docs, huge pages are not supported. + explicit blob_pinned(blob_file&& blob, bool copy_in_memory = true) + : blob_{std::move(blob), copy_in_memory, false} + { + } + explicit blob_pinned(std::string file_name, + uint32_t rows_offset = 0, + uint32_t rows_limit = 0, + bool copy_in_memory = true) + : blob_pinned{blob_file{file_name, rows_offset, rows_limit}, copy_in_memory} + { + } + + ~blob_pinned() noexcept { reset_lazy_state(); } + + // No copies for owning struct + blob_pinned(const blob_pinned& res) = delete; + auto operator=(const blob_pinned& other) -> blob_pinned& = delete; + // Moving is fine + blob_pinned(blob_pinned&& other) : blob_{std::move(other.blob_)}, ptr_{other.ptr_} + { + other.ptr_ = nullptr; + } + auto operator=(blob_pinned&& other) -> blob_pinned& + { + std::swap(this->blob_, other.blob_); + std::swap(this->ptr_, other.ptr_); + return *this; + } + + [[nodiscard]] auto data() const -> T* + { + if (ptr_ == nullptr) { + void* ptr = reinterpret_cast(blob_.data()); +#ifndef BUILD_CPU_ONLY + int flags = cudaHostRegisterDefault; + auto error_code = cudaSuccess; + if (!blob_.is_in_memory()) { + flags = cudaHostRegisterIoMemory | cudaHostRegisterReadOnly; + error_code = cudaHostRegister(ptr, blob_.size(), flags); + if (error_code == cudaErrorNotSupported) { + // Sometimes read-only is not supported + flags = cudaHostRegisterIoMemory; + error_code = cudaHostRegister(ptr, blob_.size(), error_code); + } + } else { + error_code = cudaHostRegister(ptr, blob_.size(), cudaHostRegisterDefault); + } + if (error_code == cudaErrorInvalidValue && (!blob_.is_in_memory() || blob_.is_hugepage())) { + auto hugepage = + (blob_.is_hugepage() && blob_.is_in_memory()) ? HugePages::kAsk : HugePages::kDisable; + auto file = std::move(blob_).release(); + blob_ = blob_mmap{std::move(file), true, hugepage}; + return data(); + } + if (error_code != cudaSuccess) { + log_error( + "cuvs::bench::blob_pinned: cudaHostRegister(%p, %zu, %d)", ptr, blob_.size(), flags); + throw std::runtime_error{ + "cuvs::bench::blob_pinned: call to cudaHostRegister failed with code " + + std::to_string(error_code)}; + } +#endif + ptr_ = ptr; + } + return reinterpret_cast(ptr_); + } + + [[nodiscard]] auto unpin() && noexcept -> blob_mmap + { + // unregister the memory before passing it to a third party + if (ptr_ != nullptr) { +#ifndef BUILD_CPU_ONLY + cudaHostUnregister(ptr_); +#endif + ptr_ = nullptr; + } + return blob_mmap{std::move(blob_)}; + } + void reset_lazy_state() const + { + if (ptr_ != nullptr) { +#ifndef BUILD_CPU_ONLY + cudaHostUnregister(ptr_); +#endif + ptr_ = nullptr; + } + blob_.reset_lazy_state(); + } + + [[nodiscard]] auto is_in_memory() const noexcept -> bool { return true; } + [[nodiscard]] auto is_hugepage() const noexcept -> bool { return blob_.is_hugepage(); } + [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool + { + return blob_.hugepage_compliant(request); + } + [[nodiscard]] auto n_rows() const -> uint32_t { return blob_.n_rows(); } + [[nodiscard]] auto n_cols() const -> uint32_t { return blob_.n_cols(); } + [[nodiscard]] auto size() const -> size_t { return blob_.size(); } + [[nodiscard]] auto release() && noexcept -> blob_file + { + return std::move(*this).unpin().release(); + } +}; + +template +struct blob_copying { + private: + blob_mmap blob_; + mutable std::optional mem_ = std::nullopt; + + public: + explicit blob_copying(blob_mmap&& blob) : blob_{std::move(blob)} {} + // First map the file and then copy it to device; use huge pages for faster copy + explicit blob_copying(blob_file&& blob) : blob_{std::move(blob), false, HugePages::kAsk} {} + explicit blob_copying(std::string file_name, uint32_t rows_offset = 0, uint32_t rows_limit = 0) + : blob_copying{blob_file{file_name, rows_offset, rows_limit}} + { + } + + [[nodiscard]] auto data() const -> T* + { + if (!mem_.has_value()) { + mem_.emplace(blob_.size()); +#ifndef BUILD_CPU_ONLY + auto error_code = cudaMemcpy(mem_->data(), blob_.data(), blob_.size(), cudaMemcpyDefault); + if (error_code != cudaSuccess) { + throw std::runtime_error{"cuvs::bench::blob_device: call to cudaMemcpy failed with code " + + std::to_string(error_code)}; + } +#endif + } + return reinterpret_cast(mem_->data()); + } + + [[nodiscard]] auto free() && noexcept -> blob_mmap + { + mem_.reset(); + return blob_mmap{std::move(blob_)}; + } + void reset_lazy_state() const + { + mem_.reset(); + blob_.reset_lazy_state(); + } + + [[nodiscard]] auto is_in_memory() const noexcept -> bool { return true; } + [[nodiscard]] auto is_hugepage() const noexcept -> bool { return blob_.is_hugepage(); } + // For copying CUDA allocation, the hugepage setting is not relevant at all. + [[nodiscard]] auto hugepage_compliant(HugePages request) const -> bool { return true; } + [[nodiscard]] auto n_rows() const -> uint32_t { return blob_.n_rows(); } + [[nodiscard]] auto n_cols() const -> uint32_t { return blob_.n_cols(); } + [[nodiscard]] auto size() const -> size_t { return blob_.size(); } + [[nodiscard]] auto release() && noexcept -> blob_file + { + return std::move(*this).free().release(); + } +}; + +template +using blob_device = blob_copying; +template +using blob_managed = blob_copying; + +/** + * @brief A blob is a single contiguous piece of data. + * + * It can reside in host, managed, or device memory, it can be pinned in physical memory or backed + * by a filesystem. You can also control whether to use huge pages (2MB) when it is in host memory. + * + * The blob data is a one- or two-dimensional typed array (it has n_rows and n_cols properties). + * + * The blob tries to be lazy accessing the data. It reads or copies the data only when it is + * requested. This allows the benchmarking executable be more resilient; for example, a blob can + * point to a non-existent query dataset when the benchmark is in the index build mode. + * + * @tparam T the data type. + */ +template +struct blob { + private: + using blob_type = std::variant, blob_pinned, blob_device, blob_managed>; + mutable blob_type value_; + + [[nodiscard]] auto data_mmap(bool in_memory, HugePages request_hugepages_2mb) const -> T* + { + if (auto* v = std::get_if>(&value_)) { + if (v->is_in_memory() == in_memory && v->hugepage_compliant(request_hugepages_2mb)) { + return v->data(); + } + } + blob_type tmp{std::move(value_)}; + value_ = std::visit( + [in_memory, request_hugepages_2mb](auto&& val) { + return blob_mmap{std::move(val).release(), in_memory, request_hugepages_2mb}; + }, + std::move(tmp)); + + return data(); + } + + [[nodiscard]] auto data_pinned(HugePages request_hugepages_2mb) const -> T* + { + // The requested type is there + if (auto* v = std::get_if>(&value_)) { + if (v->hugepage_compliant(request_hugepages_2mb)) { return v->data(); } + } + // If there's already an mmap allocation, we just need to pin it. + if (auto* v = std::get_if>(&value_)) { + if (v->hugepage_compliant(request_hugepages_2mb)) { + blob_mmap tmp{std::move(*v)}; + return value_.template emplace>(std::move(tmp)).data(); + } + } + // otherwise do full reset + blob_type tmp{std::move(value_)}; + value_ = std::visit( + [request_hugepages_2mb](auto&& val) { + blob_mmap tmp{std::move(val).release(), true, request_hugepages_2mb}; + return blob_pinned{std::move(tmp)}; + }, + std::move(value_)); + + return data(); + } + + [[nodiscard]] auto data_device() const -> T* + { + // The requested type is there + if (auto* v = std::get_if>(&value_)) { return v->data(); } + // otherwise do full reset + blob_type tmp{std::move(value_)}; + value_ = std::visit([](auto&& val) { return blob_device{std::move(val).release()}; }, + std::move(tmp)); + return data(); + } + + [[nodiscard]] auto data_managed() const -> T* + { + // The requested type is there + if (auto* v = std::get_if>(&value_)) { return v->data(); } + // otherwise do full reset + blob_type tmp{std::move(value_)}; + value_ = std::visit([](auto&& val) { return blob_managed{std::move(val).release()}; }, + std::move(tmp)); + return data(); + } + + public: + explicit blob(std::string file_name, + uint32_t rows_offset = 0, + uint32_t rows_limit = 0, + bool copy_in_memory = false, + HugePages hugepages_2mb = HugePages::kDisable) + : value_{std::in_place_type>, + std::move(file_name), + rows_offset, + rows_limit, + copy_in_memory, + hugepages_2mb} + { + } + + template + explicit blob(VariantT&& blob_variant) : value_{std::move(blob_variant)} + { + } + + [[nodiscard]] auto data() const -> T* + { + return std::visit([](auto&& val) { return val.data(); }, value_); + } + + [[nodiscard]] auto data(MemoryType memory_type, + HugePages request_hugepages_2mb = HugePages::kDisable) const -> T* + { + switch (memory_type) { + case MemoryType::kHost: return data_mmap(true, request_hugepages_2mb); + case MemoryType::kHostMmap: return data_mmap(false, request_hugepages_2mb); + case MemoryType::kHostPinned: + if (request_hugepages_2mb > HugePages::kDisable) { + log_error( + "cuvs::bench::blob::data(): huge pages are requested but not supported by " + "cudaHostRegister at the moment. We will try nevertheless..."); + } + return data_pinned(request_hugepages_2mb); + case MemoryType::kDevice: return data_device(); // hugepages are not relevant here + case MemoryType::kManaged: return data_managed(); // hugepages are not relevant here + default: + throw std::runtime_error{"cuvs::bench::blob::data(): unexpected memory type " + + std::to_string(static_cast(memory_type))}; + } + } + + [[nodiscard]] auto is_in_memory() const noexcept -> bool + { + return std::visit([](auto&& val) { return val.is_in_memory(); }, value_); + } + [[nodiscard]] auto is_hugepage() const noexcept -> bool + { + return std::visit([](auto&& val) { return val.is_hugepage(); }, value_); + } + [[nodiscard]] auto n_rows() const -> uint32_t + { + return std::visit([](auto&& val) { return val.n_rows(); }, value_); + } + + [[nodiscard]] auto n_cols() const -> uint32_t + { + return std::visit([](auto&& val) { return val.n_cols(); }, value_); + } + /** Size of the blob content in bytes (doesn't include file header). */ + [[nodiscard]] auto size() const -> size_t + { + return std::visit([](auto&& val) { return val.size(); }, value_); + } + /** + * Reset the hidden internal state, e.g. release the memory allocation or close a file descriptor. + * This is useful in case when accessing the blob resulted in an error/invalid state, which should + * be reproduced at a later point in time. + */ + void reset_lazy_state() const + { + std::visit([](auto&& val) { val.reset_lazy_state(); }, value_); + } +}; + +} // namespace cuvs::bench diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp index 1fc7327cb..ac1361219 100644 --- a/cpp/bench/ann/src/common/conf.hpp +++ b/cpp/bench/ann/src/common/conf.hpp @@ -45,14 +45,16 @@ class configuration { // the range of rows is [subset_first_row, subset_first_row + subset_size) // however, subset_size = 0 means using all rows after subset_first_row // that is, the subset is [subset_first_row, #rows in base_file) - size_t subset_first_row{0}; - size_t subset_size{0}; + uint32_t subset_first_row{0}; + uint32_t subset_size{0}; std::string query_file; std::string distance; std::optional groundtruth_neighbors_file{std::nullopt}; // data type of input dataset, possible values ["float", "int8", "uint8"] std::string dtype; + + std::optional filtering_rate{std::nullopt}; }; explicit inline configuration(std::istream& conf_stream) @@ -74,6 +76,9 @@ class configuration { dataset_conf_.base_file = conf.at("base_file"); dataset_conf_.query_file = conf.at("query_file"); dataset_conf_.distance = conf.at("distance"); + if (conf.contains("filtering_rate")) { + dataset_conf_.filtering_rate.emplace(conf.at("filtering_rate")); + } if (conf.contains("groundtruth_neighbors_file")) { dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file"); diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp index 49020fe36..ba801d165 100644 --- a/cpp/bench/ann/src/common/dataset.hpp +++ b/cpp/bench/ann/src/common/dataset.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,529 +16,166 @@ #pragma once #include "ann_types.hpp" -#include "util.hpp" +#include "blob.hpp" -#include -#include -#include - -#include #include #include #include -#include +#include #include -#include -#include -#include namespace cuvs::bench { -// http://big-algo-benchmarks.com/index.html: -// binary format that starts with 8 bytes of data consisting of num_points(uint32_t) -// num_dimensions(uint32) followed by num_pts x num_dimensions x sizeof(type) bytes of -// data stored one vector after another. -// Data files will have suffixes .fbin, .u8bin, and .i8bin to represent float32, uint8 -// and int8 type data. -// As extensions for this benchmark, half and int data files will have suffixes .f16bin -// and .ibin, respectively. -template -class bin_file { - public: - bin_file(std::string file, - const std::string& mode, - uint32_t subset_first_row = 0, - uint32_t subset_size = 0); - ~bin_file() - { - if (mapped_ptr_ != nullptr) { unmap(); } - if (fp_ != nullptr) { fclose(fp_); } - } - bin_file(const bin_file&) = delete; - auto operator=(const bin_file&) -> bin_file& = delete; - - void get_shape(size_t* nrows, int* ndims) const - { - assert(read_mode_); - if (!fp_) { open_file(); } - *nrows = nrows_; - *ndims = ndims_; - } - - void read(T* data) const - { - assert(read_mode_); - if (!fp_) { open_file(); } - size_t total = static_cast(nrows_) * ndims_; - if (fread(data, sizeof(T), total, fp_) != total) { - throw std::runtime_error{"fread() bin_file " + file_ + " failed"}; - } - } - - void write(const T* data, uint32_t nrows, uint32_t ndims) - { - assert(!read_mode_); - if (!fp_) { open_file(); } - if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) { - throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"}; - } - if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) { - throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"}; - } - - size_t total = static_cast(nrows) * ndims; - if (fwrite(data, sizeof(T), total, fp_) != total) { - throw std::runtime_error{"fwrite() bin_file " + file_ + " failed"}; - } - } - - auto map() const -> T* - { - assert(read_mode_); - if (!fp_) { open_file(); } - int fid = fileno(fp_); - mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0); - if (mapped_ptr_ == MAP_FAILED) { - mapped_ptr_ = nullptr; - throw std::runtime_error{"mmap error: Value of errno " + std::to_string(errno) + ", " + - std::string(strerror(errno))}; - } - return reinterpret_cast(reinterpret_cast(mapped_ptr_) + 2 * sizeof(uint32_t) + - subset_first_row_ * ndims_ * sizeof(T)); - } - - void unmap() const - { - if (munmap(mapped_ptr_, file_size_) == -1) { - throw std::runtime_error{"munmap error: " + std::string(strerror(errno))}; - } - } - - private: - void check_suffix(); - void open_file() const; - - std::string file_; - bool read_mode_; - uint32_t subset_first_row_; - uint32_t subset_size_; - - mutable FILE* fp_{nullptr}; - mutable uint32_t nrows_; - mutable uint32_t ndims_; - mutable size_t file_size_; - mutable void* mapped_ptr_{nullptr}; -}; - -template -bin_file::bin_file(std::string file, - const std::string& mode, - uint32_t subset_first_row, - uint32_t subset_size) - : file_(std::move(file)), - read_mode_(mode == "r"), - subset_first_row_(subset_first_row), - subset_size_(subset_size) - +template +void generate_bernoulli(CarrierT* data, size_t words, double p) { - check_suffix(); - - if (!read_mode_) { - if (mode == "w") { - if (subset_first_row != 0) { - throw std::runtime_error{"subset_first_row should be zero for write mode"}; - } - if (subset_size != 0) { - throw std::runtime_error{"subset_size should be zero for write mode"}; - } - } else { - throw std::runtime_error{"bin_file's mode must be either 'r' or 'w': " + file_}; + constexpr size_t kBitsPerCarrierValue = sizeof(CarrierT) * 8; + std::random_device rd; + std::mt19937 gen(rd()); + std::bernoulli_distribution d(p); + for (size_t i = 0; i < words; i++) { + CarrierT word = 0; + for (size_t j = 0; j < kBitsPerCarrierValue; j++) { + word |= CarrierT{d(gen)} << j; } + data[i] = word; } -} - -template -void bin_file::open_file() const -{ - fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w"); - if (!fp_) { throw std::runtime_error{"open bin_file failed: " + file_}; } - - if (read_mode_) { - struct stat statbuf; - if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error{"stat() failed: " + file_}; } - file_size_ = statbuf.st_size; +}; - uint32_t header[2]; - if (fread(header, sizeof(uint32_t), 2, fp_) != 2) { - throw std::runtime_error{"read header of bin_file failed: " + file_}; - } - nrows_ = header[0]; - ndims_ = header[1]; +template +struct dataset { + public: + using bitset_carrier_type = uint32_t; + static inline constexpr size_t kBitsPerCarrierValue = sizeof(bitset_carrier_type) * 8; - size_t expected_file_size = - 2 * sizeof(uint32_t) + static_cast(nrows_) * ndims_ * sizeof(T); - if (file_size_ != expected_file_size) { - throw std::runtime_error{"expected file size of " + file_ + " is " + - std::to_string(expected_file_size) + ", however, actual size is " + - std::to_string(file_size_)}; - } + private: + std::string name_; + std::string distance_; + blob base_set_; + blob query_set_; + std::optional> ground_truth_set_; + std::optional> filter_bitset_; - if (subset_first_row_ >= nrows_) { - throw std::runtime_error{file_ + ": subset_first_row (" + std::to_string(subset_first_row_) + - ") >= nrows (" + std::to_string(nrows_) + ")"}; - } - if (subset_first_row_ + subset_size_ > nrows_) { - throw std::runtime_error{file_ + ": subset_first_row (" + std::to_string(subset_first_row_) + - ") + subset_size (" + std::to_string(subset_size_) + ") > nrows (" + - std::to_string(nrows_) + ")"}; - } + mutable bool base_set_accessed_ = false; + mutable bool query_set_accessed_ = false; - if (subset_first_row_) { - static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset"); - if (fseek(fp_, sizeof(T) * subset_first_row_ * ndims_, SEEK_CUR) == -1) { - throw std::runtime_error{file_ + ": fseek failed"}; + public: + dataset(std::string name, + std::string base_file, + uint32_t subset_first_row, + uint32_t subset_size, + std::string query_file, + std::string distance, + std::optional groundtruth_neighbors_file, + std::optional filtering_rate = std::nullopt) + : name_{std::move(name)}, + distance_{std::move(distance)}, + base_set_{base_file, subset_first_row, subset_size}, + query_set_{query_file}, + ground_truth_set_{groundtruth_neighbors_file.has_value() + ? std::make_optional>(groundtruth_neighbors_file.value()) + : std::nullopt} + { + if (filtering_rate.has_value()) { + // Generate a random bitset for filtering + auto n_rows = static_cast(subset_size) + static_cast(subset_first_row); + if (subset_size == 0) { + // Read the base set size as a last resort only - for better laziness + n_rows = base_set_size(); } - nrows_ -= subset_first_row_; + auto bitset_size = (n_rows - 1) / kBitsPerCarrierValue + 1; + blob_file bitset_blob_file{static_cast(bitset_size), 1}; + blob_mmap bitset_blob{ + std::move(bitset_blob_file), false, HugePages::kDisable}; + generate_bernoulli(const_cast(bitset_blob.data()), + bitset_size, + 1.0 - filtering_rate.value()); + filter_bitset_.emplace(std::move(bitset_blob)); } - if (subset_size_) { nrows_ = subset_size_; } } -} -template -void bin_file::check_suffix() -{ - auto pos = file_.rfind('.'); - if (pos == std::string::npos) { - throw std::runtime_error{"name of bin_file doesn't have a suffix: " + file_}; + [[nodiscard]] auto name() const -> std::string { return name_; } + [[nodiscard]] auto distance() const -> std::string { return distance_; } + [[nodiscard]] auto dim() const -> int + { + // If any of base/query set are already accessed, use those + if (base_set_accessed_) { return static_cast(base_set_.n_cols()); } + if (query_set_accessed_) { return static_cast(query_set_.n_cols()); } + // Otherwise, try reading both (one of the two sets may be missing) + try { + query_set_accessed_ = true; + return static_cast(query_set_.n_cols()); + } catch (const std::runtime_error& e) { + // Any exception raised above will re-raise next time we try to access the query set. + query_set_accessed_ = false; + query_set_.reset_lazy_state(); + } + base_set_accessed_ = true; + return static_cast(base_set_.n_cols()); + } + [[nodiscard]] auto max_k() const -> uint32_t + { + if (ground_truth_set_.has_value()) { return ground_truth_set_->n_cols(); } + return 0; } - std::string suffix = file_.substr(pos + 1); - - if constexpr (std::is_same_v) { - if (suffix != "fbin") { - throw std::runtime_error{"bin_file should has .fbin suffix: " + file_}; - } - } else if constexpr (std::is_same_v) { - if (suffix != "f16bin" && suffix != "fbin") { - throw std::runtime_error{"bin_file should has .f16bin suffix: " + file_}; - } - } else if constexpr (std::is_same_v) { - if (suffix != "ibin") { - throw std::runtime_error{"bin_file should has .ibin suffix: " + file_}; - } - } else if constexpr (std::is_same_v) { - if (suffix != "u8bin") { - throw std::runtime_error{"bin_file should has .u8bin suffix: " + file_}; - } - } else if constexpr (std::is_same_v) { - if (suffix != "i8bin") { - throw std::runtime_error{"bin_file should has .i8bin suffix: " + file_}; - } - } else { - throw std::runtime_error( - "T of bin_file should be one of float, half, int, uint8_t, or int8_t"); + [[nodiscard]] auto base_set_size() const -> size_t + { + base_set_accessed_ = true; + return base_set_.n_rows(); } -} - -template -class dataset { - public: - explicit dataset(std::string name) : name_(std::move(name)) {} - dataset(std::string name, std::string distance) - : name_(std::move(name)), distance_(std::move(distance)) + [[nodiscard]] auto query_set_size() const -> size_t { + query_set_accessed_ = true; + return query_set_.n_rows(); } - dataset(const dataset&) = delete; - auto operator=(const dataset&) -> dataset& = delete; - virtual ~dataset(); - - auto name() const -> std::string { return name_; } - auto distance() const -> std::string { return distance_; } - virtual auto dim() const -> int = 0; - virtual auto max_k() const -> uint32_t = 0; - virtual auto base_set_size() const -> size_t = 0; - virtual auto query_set_size() const -> size_t = 0; - // load data lazily, so don't pay the overhead of reading unneeded set - // e.g. don't load base set when searching - auto base_set() const -> const T* + [[nodiscard]] auto gt_set() const -> const IdxT* { - if (!base_set_) { load_base_set(); } - return base_set_; + if (ground_truth_set_.has_value()) { return ground_truth_set_->data(); } + return nullptr; } - auto query_set() const -> const T* + [[nodiscard]] auto query_set() const -> const DataT* { - if (!query_set_) { load_query_set(); } - return query_set_; + query_set_accessed_ = true; + return query_set_.data(); } - - auto gt_set() const -> const int32_t* + [[nodiscard]] auto query_set(MemoryType memory_type, + HugePages request_hugepages_2mb = HugePages::kDisable) const + -> const DataT* { - if (!gt_set_) { load_gt_set(); } - return gt_set_; + query_set_accessed_ = true; + return query_set_.data(memory_type, request_hugepages_2mb); } - auto base_set_on_gpu() const -> const T*; - auto query_set_on_gpu() const -> const T*; - auto mapped_base_set() const -> const T*; - - auto query_set(MemoryType memory_type) const -> const T* + [[nodiscard]] auto base_set() const -> const DataT* { - switch (memory_type) { - case MemoryType::kDevice: return query_set_on_gpu(); - case MemoryType::kHost: { - auto r = query_set(); -#ifndef BUILD_CPU_ONLY - if (query_set_pinned_) { - cudaHostUnregister(const_cast(r)); - query_set_pinned_ = false; - } -#endif - return r; - } - case MemoryType::kHostPinned: { - auto r = query_set(); -#ifndef BUILD_CPU_ONLY - if (!query_set_pinned_) { - cudaHostRegister( - const_cast(r), query_set_size() * dim() * sizeof(T), cudaHostRegisterDefault); - query_set_pinned_ = true; - } -#endif - return r; - } - default: return nullptr; - } + base_set_accessed_ = true; + return base_set_.data(); } - - auto base_set(MemoryType memory_type) const -> const T* + [[nodiscard]] auto base_set(MemoryType memory_type, + HugePages request_hugepages_2mb = HugePages::kDisable) const + -> const DataT* { - switch (memory_type) { - case MemoryType::kDevice: return base_set_on_gpu(); - case MemoryType::kHost: { - auto r = base_set(); -#ifndef BUILD_CPU_ONLY - if (base_set_pinned_) { - cudaHostUnregister(const_cast(r)); - base_set_pinned_ = false; - } -#endif - return r; - } - case MemoryType::kHostPinned: { - auto r = base_set(); -#ifndef BUILD_CPU_ONLY - if (!base_set_pinned_) { - cudaHostRegister( - const_cast(r), base_set_size() * dim() * sizeof(T), cudaHostRegisterDefault); - base_set_pinned_ = true; - } -#endif - return r; - } - case MemoryType::kHostMmap: return mapped_base_set(); - default: return nullptr; - } + base_set_accessed_ = true; + return base_set_.data(memory_type, request_hugepages_2mb); } - protected: - virtual void load_base_set() const = 0; - virtual void load_gt_set() const = 0; - virtual void load_query_set() const = 0; - virtual void map_base_set() const = 0; - - std::string name_; - std::string distance_; - - mutable T* base_set_ = nullptr; - mutable T* query_set_ = nullptr; - mutable T* d_base_set_ = nullptr; - mutable T* d_query_set_ = nullptr; - mutable T* mapped_base_set_ = nullptr; - mutable int32_t* gt_set_ = nullptr; - - mutable bool base_set_pinned_ = false; - mutable bool query_set_pinned_ = false; -}; - -template -dataset::~dataset() -{ -#ifndef BUILD_CPU_ONLY - if (d_base_set_) { cudaFree(d_base_set_); } - if (d_query_set_) { cudaFree(d_query_set_); } - if (base_set_pinned_) { cudaHostUnregister(base_set_); } - if (query_set_pinned_) { cudaHostUnregister(query_set_); } -#endif - delete[] base_set_; - delete[] query_set_; - delete[] gt_set_; -} - -template -auto dataset::base_set_on_gpu() const -> const T* -{ -#ifndef BUILD_CPU_ONLY - if (!d_base_set_) { - base_set(); - cudaMalloc(reinterpret_cast(&d_base_set_), base_set_size() * dim() * sizeof(T)); - cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice); + [[nodiscard]] auto filter_bitset() const -> const bitset_carrier_type* + { + if (filter_bitset_.has_value()) { return filter_bitset_->data(); } + return nullptr; } -#endif - return d_base_set_; -} -template -auto dataset::query_set_on_gpu() const -> const T* -{ -#ifndef BUILD_CPU_ONLY - if (!d_query_set_) { - query_set(); - cudaMalloc(reinterpret_cast(&d_query_set_), query_set_size() * dim() * sizeof(T)); - cudaMemcpy( - d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice); + [[nodiscard]] auto filter_bitset(MemoryType memory_type, + HugePages request_hugepages_2mb = HugePages::kDisable) const + -> const bitset_carrier_type* + { + if (filter_bitset_.has_value()) { + return filter_bitset_->data(memory_type, request_hugepages_2mb); + } + return nullptr; } -#endif - return d_query_set_; -} - -template -auto dataset::mapped_base_set() const -> const T* -{ - if (!mapped_base_set_) { map_base_set(); } - return mapped_base_set_; -} - -template -class bin_dataset : public dataset { - public: - bin_dataset(const std::string& name, - const std::string& base_file, - size_t subset_first_row, - size_t subset_size, - const std::string& query_file, - const std::string& distance, - const std::optional& groundtruth_neighbors_file); - - auto dim() const -> int override; - auto max_k() const -> uint32_t override; - auto base_set_size() const -> size_t override; - auto query_set_size() const -> size_t override; - - private: - void load_base_set() const; - void load_query_set() const; - void load_gt_set() const; - void map_base_set() const; - - mutable int dim_ = 0; - mutable uint32_t max_k_ = 0; - mutable size_t base_set_size_ = 0; - mutable size_t query_set_size_ = 0; - - bin_file base_file_; - bin_file query_file_; - std::optional> gt_file_{std::nullopt}; }; -template -bin_dataset::bin_dataset(const std::string& name, - const std::string& base_file, - size_t subset_first_row, - size_t subset_size, - const std::string& query_file, - const std::string& distance, - const std::optional& groundtruth_neighbors_file) - : dataset(name, distance), - base_file_(base_file, "r", subset_first_row, subset_size), - query_file_(query_file, "r") -{ - if (groundtruth_neighbors_file.has_value()) { - gt_file_.emplace(groundtruth_neighbors_file.value(), "r"); - } -} - -template -auto bin_dataset::dim() const -> int -{ - if (dim_ > 0) { return dim_; } - if (base_set_size() > 0) { return dim_; } - if (query_set_size() > 0) { return dim_; } - return dim_; -} - -template -auto bin_dataset::max_k() const -> uint32_t -{ - if (!this->gt_set_) { load_gt_set(); } - return max_k_; -} - -template -auto bin_dataset::query_set_size() const -> size_t -{ - if (query_set_size_ > 0) { return query_set_size_; } - int dim; - query_file_.get_shape(&query_set_size_, &dim); - if (query_set_size_ == 0) { throw std::runtime_error{"Zero query set size"}; } - if (dim == 0) { throw std::runtime_error{"Zero query set dim"}; } - if (dim_ == 0) { - dim_ = dim; - } else if (dim_ != dim) { - throw std::runtime_error{"base set dim (" + std::to_string(dim_) + ") != query set dim (" + - std::to_string(dim)}; - } - return query_set_size_; -} - -template -auto bin_dataset::base_set_size() const -> size_t -{ - if (base_set_size_ > 0) { return base_set_size_; } - int dim; - base_file_.get_shape(&base_set_size_, &dim); - if (base_set_size_ == 0) { throw std::runtime_error{"Zero base set size"}; } - if (dim == 0) { throw std::runtime_error{"Zero base set dim"}; } - if (dim_ == 0) { - dim_ = dim; - } else if (dim_ != dim) { - throw std::runtime_error{"base set dim (" + std::to_string(dim) + ") != query set dim (" + - std::to_string(dim_)}; - } - return base_set_size_; -} - -template -void bin_dataset::load_base_set() const -{ - this->base_set_ = new T[base_set_size() * dim()]; - base_file_.read(this->base_set_); -} - -template -void bin_dataset::load_query_set() const -{ - this->query_set_ = new T[query_set_size() * dim()]; - query_file_.read(this->query_set_); -} - -template -void bin_dataset::load_gt_set() const -{ - if (gt_file_.has_value()) { - size_t queries; - int k; - gt_file_->get_shape(&queries, &k); - this->gt_set_ = new std::int32_t[queries * k]; - gt_file_->read(this->gt_set_); - max_k_ = k; - } -} - -template -void bin_dataset::map_base_set() const -{ - this->mapped_base_set_ = base_file_.map(); -} - } // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h index 11e0e4ad3..441a1eafa 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h @@ -275,4 +275,23 @@ void refine_helper(const raft::resources& res, } } +/** + * Construct a cuVS-compatible bitset filter object from a raw pointer to the bitset. + * + * @param[in] filter_bitset a pointer to a pre-generated bitset + * @param[in] n_rows the number of elements in the bitset / dataset. + * @return a shared pointer to the filter object (doesn't own the bitset data!) + */ +inline auto make_cuvs_filter(const void* filter_bitset, int64_t n_rows) + -> std::shared_ptr +{ + if (filter_bitset != nullptr) { + return std::make_shared>( + raft::core::bitset_view( + const_cast(reinterpret_cast(filter_bitset)), n_rows)); + } else { + return std::make_shared(); + } +} + } // namespace cuvs::bench diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h index 6670ed892..af72dd02b 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h @@ -47,7 +47,7 @@ class cuvs_cagra_hnswlib : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, @@ -102,8 +102,10 @@ void cuvs_cagra_hnswlib::build(const T* dataset, size_t nrow) } template -void cuvs_cagra_hnswlib::set_search_param(const search_param_base& param_) +void cuvs_cagra_hnswlib::set_search_param(const search_param_base& param_, + const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } search_param_ = dynamic_cast(param_); } diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index 8c9cb2d4f..f5f609710 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -119,7 +119,7 @@ class cuvs_cagra : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void set_search_dataset(const T* dataset, size_t nrow) override; @@ -187,6 +187,8 @@ class cuvs_cagra : public algo, public algo_gpu { size_t dynamic_batching_n_queues_; bool dynamic_batching_conservative_dispatch_; + std::shared_ptr filter_; + inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type) { switch (mem_type) { @@ -228,8 +230,10 @@ inline auto allocator_to_string(AllocatorType mem_type) -> std::string } template -void cuvs_cagra::set_search_param(const search_param_base& param) +void cuvs_cagra::set_search_param(const search_param_base& param, + const void* filter_bitset) { + filter_ = make_cuvs_filter(filter_bitset, index_->size()); auto sp = dynamic_cast(param); bool needs_dynamic_batcher_update = (dynamic_batching_max_batch_size_ != sp.dynamic_batching_max_batch_size) || @@ -292,7 +296,8 @@ void cuvs_cagra::set_search_param(const search_param_base& param) sp.dynamic_batching_n_queues, sp.dynamic_batching_conservative_dispatch}, *index_, - search_params_); + search_params_, + filter_.get()); } dynamic_batcher_sp_.dispatch_timeout_ms = sp.dynamic_batching_dispatch_timeout_ms; } else { @@ -379,7 +384,7 @@ void cuvs_cagra::search_base(const T* queries, distances_view); } else { cuvs::neighbors::cagra::search( - handle_, search_params_, *index_, queries_view, neighbors_view, distances_view); + handle_, search_params_, *index_, queries_view, neighbors_view, distances_view, *filter_); } if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) { diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h index b7e335622..a9dec026e 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_flat_wrapper.h @@ -59,7 +59,7 @@ class cuvs_ivf_flat : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, @@ -92,6 +92,8 @@ class cuvs_ivf_flat : public algo, public algo_gpu { std::shared_ptr> index_; int device_; int dimension_; + + std::shared_ptr filter_; }; template @@ -111,8 +113,10 @@ void cuvs_ivf_flat::build(const T* dataset, size_t nrow) } template -void cuvs_ivf_flat::set_search_param(const search_param_base& param) +void cuvs_ivf_flat::set_search_param(const search_param_base& param, + const void* filter_bitset) { + filter_ = make_cuvs_filter(filter_bitset, index_->size()); auto sp = dynamic_cast(param); search_params_ = sp.ivf_flat_params; assert(search_params_.n_probes <= index_params_.n_lists); @@ -162,7 +166,8 @@ void cuvs_ivf_flat::search( *index_, raft::make_device_matrix_view(queries, batch_size, index_->dim()), raft::make_device_matrix_view(neighbors_idx_t, batch_size, k), - raft::make_device_matrix_view(distances, batch_size, k)); + raft::make_device_matrix_view(distances, batch_size, k), + *filter_); if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) { raft::linalg::unaryOp(neighbors, neighbors_idx_t, diff --git a/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h index dac766669..5bef85b7e 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ivf_pq_wrapper.h @@ -67,7 +67,7 @@ class cuvs_ivf_pq : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void set_search_dataset(const T* dataset, size_t nrow) override; void search(const T* queries, @@ -110,6 +110,8 @@ class cuvs_ivf_pq : public algo, public algo_gpu { std::shared_ptr> dynamic_batcher_; cuvs::neighbors::dynamic_batching::search_params dynamic_batcher_sp_{}; + + std::shared_ptr filter_; }; template @@ -144,8 +146,10 @@ std::unique_ptr> cuvs_ivf_pq::copy() } template -void cuvs_ivf_pq::set_search_param(const search_param_base& param) +void cuvs_ivf_pq::set_search_param(const search_param_base& param, + const void* filter_bitset) { + filter_ = make_cuvs_filter(filter_bitset, index_->size()); auto sp = dynamic_cast(param); search_params_ = sp.pq_param; refine_ratio_ = sp.refine_ratio; @@ -160,7 +164,8 @@ void cuvs_ivf_pq::set_search_param(const search_param_base& param) sp.dynamic_batching_n_queues, sp.dynamic_batching_conservative_dispatch}, *index_, - search_params_); + search_params_, + filter_.get()); dynamic_batcher_sp_.dispatch_timeout_ms = sp.dynamic_batching_dispatch_timeout_ms; } else { dynamic_batcher_.reset(); @@ -204,7 +209,7 @@ void cuvs_ivf_pq::search_base( distances_view); } else { cuvs::neighbors::ivf_pq::search( - handle_, search_params_, *index_, queries_view, neighbors_view, distances_view); + handle_, search_params_, *index_, queries_view, neighbors_view, distances_view, *filter_); } if constexpr (sizeof(IdxT) != sizeof(algo_base::index_type)) { diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h index 50c1ff4db..cb2bbd9b5 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h @@ -52,7 +52,7 @@ class cuvs_mg_cagra : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void set_search_dataset(const T* dataset, size_t nrow) override; @@ -114,8 +114,10 @@ void cuvs_mg_cagra::build(const T* dataset, size_t nrow) inline auto allocator_to_string(AllocatorType mem_type) -> std::string; template -void cuvs_mg_cagra::set_search_param(const search_param_base& param) +void cuvs_mg_cagra::set_search_param(const search_param_base& param, + const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast(param); // search_params_ = static_cast>(sp.p); cagra::search_params* search_params_ptr_ = static_cast(&search_params_); diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h index 0540edc8f..b9f8b4b32 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h @@ -45,7 +45,7 @@ class cuvs_mg_ivf_flat : public algo, public algo_gpu { } void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, int k, @@ -91,8 +91,10 @@ void cuvs_mg_ivf_flat::build(const T* dataset, size_t nrow) } template -void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param) +void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param, + const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast(param); // search_params_ = sp.ivf_flat_params; ivf_flat::search_params* search_params_ptr_ = diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h index 65ca1bb11..26781c522 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h @@ -45,7 +45,7 @@ class cuvs_mg_ivf_pq : public algo, public algo_gpu { } void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, int k, @@ -91,8 +91,10 @@ void cuvs_mg_ivf_pq::build(const T* dataset, size_t nrow) } template -void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param) +void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param, + const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast(param); // search_params_ = static_cast>(sp.pq_param); ivf_pq::search_params* search_params_ptr_ = static_cast(&search_params_); diff --git a/cpp/bench/ann/src/cuvs/cuvs_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_wrapper.h index bf0fa5934..69bdaeb12 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_wrapper.h @@ -58,7 +58,7 @@ class cuvs_gpu : public algo, public algo_gpu { void build(const T*, size_t) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, @@ -91,6 +91,8 @@ class cuvs_gpu : public algo, public algo_gpu { int device_; const T* dataset_; size_t nrow_; + + std::shared_ptr filter_; }; template @@ -111,9 +113,9 @@ void cuvs_gpu::build(const T* dataset, size_t nrow) } template -void cuvs_gpu::set_search_param(const search_param_base&) +void cuvs_gpu::set_search_param(const search_param_base&, const void* filter_bitset) { - // Nothing to set here as it is brute force implementation + filter_ = make_cuvs_filter(filter_bitset, index_->size()); } template @@ -155,12 +157,8 @@ void cuvs_gpu::search( raft::make_device_matrix_view(neighbors, batch_size, k); auto distances_view = raft::make_device_matrix_view(distances, batch_size, k); - cuvs::neighbors::brute_force::search(handle_, - *index_, - queries_view, - neighbors_view, - distances_view, - cuvs::neighbors::filtering::none_sample_filter{}); + cuvs::neighbors::brute_force::search( + handle_, *index_, queries_view, neighbors_view, distances_view, *filter_); } template diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 0cc40de37..4cb630ed2 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -75,7 +75,7 @@ class faiss_cpu : public algo { void build(const T* dataset, size_t nrow) final; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void init_quantizer(int dim) { @@ -153,8 +153,9 @@ void faiss_cpu::build(const T* dataset, size_t nrow) } template -void faiss_cpu::set_search_param(const search_param_base& param) +void faiss_cpu::set_search_param(const search_param_base& param, const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast(param); int nprobe = sp.nprobe; assert(nprobe <= nlist_); @@ -303,8 +304,10 @@ class faiss_cpu_flat : public faiss_cpu { } // class faiss_cpu is more like a IVF class, so need special treating here - void set_search_param(const typename algo::search_param& param) override + void set_search_param(const typename algo::search_param& param, + const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto search_param = dynamic_cast::search_param&>(param); if (!this->thread_pool_ || this->num_threads_ != search_param.num_threads) { this->num_threads_ = search_param.num_threads; diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index 6cf1fe4b0..ed80917e8 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -111,7 +111,7 @@ class faiss_gpu : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) final; - virtual void set_search_param(const search_param_base& param) {} + virtual void set_search_param(const search_param_base& param, const void* filter_bitset) {} void set_search_dataset(const T* dataset, size_t nrow) override { dataset_ = dataset; } @@ -329,8 +329,9 @@ class faiss_gpu_ivf_flat : public faiss_gpu { this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config); } - void set_search_param(const search_param_base& param) override + void set_search_param(const search_param_base& param, const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast::search_param&>(param); int nprobe = sp.nprobe; assert(nprobe <= this->nlist_); @@ -386,8 +387,9 @@ class faiss_gpu_ivfpq : public faiss_gpu { config); } - void set_search_param(const search_param_base& param) override + void set_search_param(const search_param_base& param, const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast::search_param&>(param); int nprobe = sp.nprobe; assert(nprobe <= this->nlist_); @@ -449,8 +451,9 @@ class faiss_gpu_ivfsq : public faiss_gpu { this->gpu_resource_.get(), dim, param.nlist, qtype, this->metric_type_, true, config); } - void set_search_param(const search_param_base& param) override + void set_search_param(const search_param_base& param, const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast::search_param&>(param); int nprobe = sp.nprobe; assert(nprobe <= this->nlist_); @@ -493,8 +496,9 @@ class faiss_gpu_flat : public faiss_gpu { this->index_ = std::make_shared( this->gpu_resource_.get(), dim, this->metric_type_, config); } - void set_search_param(const search_param_base& param) override + void set_search_param(const search_param_base& param, const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = dynamic_cast::search_param&>(param); int nprobe = sp.nprobe; assert(nprobe <= this->nlist_); @@ -548,8 +552,9 @@ class faiss_gpu_cagra : public faiss_gpu { this->gpu_resource_.get(), dim, parse_metric_faiss(this->metric_), config); } - void set_search_param(const search_param_base& param) override + void set_search_param(const search_param_base& param, const void* filter_bitset) override { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto sp = static_cast::search_param&>(param); this->search_params_ = std::make_shared(sp.p); } diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh index e2ca18e22..b1c6c7b26 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh +++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh @@ -57,7 +57,10 @@ class ggnn : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) override { impl_->build(dataset, nrow); } - void set_search_param(const search_param_base& param) override { impl_->set_search_param(param); } + void set_search_param(const search_param_base& param, const void* filter_bitset) override + { + impl_->set_search_param(param, filter_bitset); + } void search(const T* queries, int batch_size, int k, @@ -128,7 +131,7 @@ class ggnn_impl : public algo, public algo_gpu { void build(const T* dataset, size_t nrow) override; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* queries, int batch_size, int k, @@ -243,8 +246,10 @@ void ggnn_impl::set_search_dataset(const T* da } template -void ggnn_impl::set_search_param(const search_param_base& param) +void ggnn_impl::set_search_param(const search_param_base& param, + const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } search_param_ = dynamic_cast::search_param&>(param); } diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 9d643f12a..d6870ae1c 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -79,7 +79,7 @@ class hnsw_lib : public algo { void build(const T* dataset, size_t nrow) override; - void set_search_param(const search_param_base& param) override; + void set_search_param(const search_param_base& param, const void* filter_bitset) override; void search(const T* query, int batch_size, int k, @@ -169,8 +169,9 @@ void hnsw_lib::build(const T* dataset, size_t nrow) } template -void hnsw_lib::set_search_param(const search_param_base& param_) +void hnsw_lib::set_search_param(const search_param_base& param_, const void* filter_bitset) { + if (filter_bitset != nullptr) { throw std::runtime_error("Filtering is not supported yet."); } auto param = dynamic_cast(param_); appr_alg_->ef_ = param.ef; num_threads_ = param.num_threads;