Skip to content

Commit

Permalink
Merge pull request #263 from rapidsai/branch-24.08
Browse files Browse the repository at this point in the history
Forward-merge branch-24.08 into branch-24.10
  • Loading branch information
GPUtester authored Jul 30, 2024
2 parents 4ef1611 + 6658c31 commit 1e62df3
Show file tree
Hide file tree
Showing 18 changed files with 320 additions and 168 deletions.
4 changes: 3 additions & 1 deletion cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

if(DISABLE_DEPRECATION_WARNINGS)
list(APPEND CUVS_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations
-DRAFT_HIDE_DEPRECATION_WARNINGS
)
endif()

# Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
Expand Down
15 changes: 14 additions & 1 deletion cpp/include/cuvs/neighbors/brute_force.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,27 @@ struct index : cuvs::neighbors::index {
cuvs::distance::DistanceType metric,
T metric_arg = 0.0);

/** Construct a brute force index from dataset
*
* Constructs a brute force index from a dataset. This lets us precompute norms for
* the dataset, providing a speed benefit over doing this at query time.
* This index will store a non-owning reference to the dataset, but will move
* any norms supplied.
*/
index(raft::resources const& res,
raft::device_matrix_view<const T, int64_t, raft::col_major> dataset_view,
std::optional<raft::device_vector<T, int64_t>>&& norms,
cuvs::distance::DistanceType metric,
T metric_arg = 0.0);

/** Construct a brute force index from dataset
*
* This class stores a non-owning reference to the dataset and norms, with
* the dataset being supplied on device in a col_major format
*/
index(raft::resources const& res,
raft::device_matrix_view<const T, int64_t, raft::col_major> dataset_view,
std::optional<raft::device_vector<T, int64_t>>&& norms,
std::optional<raft::device_vector_view<const T, int64_t>> norms_view,
cuvs::distance::DistanceType metric,
T metric_arg = 0.0);

Expand Down
52 changes: 46 additions & 6 deletions cpp/include/cuvs/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ struct ivf_pq_params {
* auto pq_params =
* cagra::graph_build_params::ivf_pq_params(dataset.extents());
* // modify/update index_params as needed
* index_params.add_data_on_build = true;
* pq_params.kmeans_trainset_fraction = 0.1;
* @endcode
*/
ivf_pq_params(raft::matrix_extent<int64_t> dataset_extents,
Expand All @@ -80,7 +80,8 @@ struct index_params : cuvs::neighbors::index_params {
/** Degree of output graph. */
size_t graph_degree = 64;
/**
* Specify compression parameters if compression is desired.
* Specify compression parameters if compression is desired. If set, overrides the
* attach_dataset_on_build (and the compressed dataset is always added to the index).
*/
std::optional<cuvs::neighbors::vpq_params> compression = std::nullopt;

Expand All @@ -105,6 +106,36 @@ struct index_params : cuvs::neighbors::index_params {
graph_build_params::ivf_pq_params,
graph_build_params::nn_descent_params>
graph_build_params;
/**
* Whether to add the dataset content to the index, i.e.:
*
* - `true` means the index is filled with the dataset vectors and ready to search after calling
* `build` provided there is enough memory available.
* - `false` means `build` only builds the graph and the user is expected to
* update the dataset using cuvs::neighbors::cagra::update_dataset.
*
* Regardless of the value of `attach_dataset_on_build`, the search graph is created using all
* the vectors in the dataset. Setting `attach_dataset_on_build = false` can be useful if
* the user needs to build only the search graph but does not intend to search it using CAGRA
* (e.g. search using another graph search algorithm), or if specific memory placement options
* need to be applied on the dataset before it is attached to the index using `update_dataset`.
* API.
* @code{.cpp}
* auto dataset = raft::make_device_matrix<float, int64_t>(res, n_rows, n_cols);
* // use default index_parameters
* cagra::index_params index_params;
* // update index_params to only build the CAGRA graph
* index_params.attach_dataset_on_build = false;
* auto index = cagra::build(res, index_params, dataset.view());
* // assert that the dataset is not attached to the index
* ASSERT(index.dataset().extent(0) == 0);
* // update dataset
* index.update_dataset(res, dataset.view());
* // The index is now ready for search
* cagra::search(res, search_params, index, queries, neighbors, distances);
* @endcode
*/
bool attach_dataset_on_build = true;
};

/**
Expand Down Expand Up @@ -328,7 +359,6 @@ struct index : cuvs::neighbors::index {
* // the index only stores a reference to these.
* cagra::search(res, search_params, index, queries, neighbors, distances);
* @endcode
*
*/
template <typename data_accessor, typename graph_accessor>
index(raft::resources const& res,
Expand All @@ -353,7 +383,7 @@ struct index : cuvs::neighbors::index {
*
* If the new dataset rows are aligned on 16 bytes, then only a reference is stored to the
* dataset. It is the caller's responsibility to ensure that dataset stays alive as long as the
* index.
* index. It is expected that the same set of vectors are used for update_dataset and index build.
*/
void update_dataset(raft::resources const& res,
raft::device_matrix_view<const T, int64_t, raft::row_major> dataset)
Expand All @@ -371,15 +401,19 @@ struct index : cuvs::neighbors::index {
/**
* Replace the dataset with a new dataset.
*
* We create a copy of the dataset on the device. The index manages the lifetime of this copy.
* We create a copy of the dataset on the device. The index manages the lifetime of this copy. It
* is expected that the same set of vectors are used for update_dataset and index build.
*/
void update_dataset(raft::resources const& res,
raft::host_matrix_view<const T, int64_t, raft::row_major> dataset)
{
dataset_ = make_aligned_dataset(res, dataset, 16);
}

/** Replace the dataset with a new dataset. */
/**
* Replace the dataset with a new dataset. It is expected that the same set of vectors are used
* for update_dataset and index build.
*/
template <typename DatasetT>
auto update_dataset(raft::resources const& res, DatasetT&& dataset)
-> std::enable_if_t<std::is_base_of_v<cuvs::neighbors::dataset<int64_t>, DatasetT>>
Expand Down Expand Up @@ -453,6 +487,7 @@ struct index : cuvs::neighbors::index {
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down Expand Up @@ -489,6 +524,7 @@ auto build(raft::resources const& res,
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down Expand Up @@ -525,6 +561,7 @@ auto build(raft::resources const& res,
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down Expand Up @@ -561,6 +598,7 @@ auto build(raft::resources const& res,
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down Expand Up @@ -597,6 +635,7 @@ auto build(raft::resources const& res,
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down Expand Up @@ -633,6 +672,7 @@ auto build(raft::resources const& res,
*
* The following distance metrics are supported:
* - L2
* - InnerProduct (currently only supported with IVF-PQ as the build algorithm)
*
* Usage example:
* @code{.cpp}
Expand Down
10 changes: 0 additions & 10 deletions cpp/include/cuvs/neighbors/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,6 @@ struct index_params {
cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded;
/** The argument used by some distance metrics. */
float metric_arg = 2.0f;
/**
* Whether to add the dataset content to the index, i.e.:
*
* - `true` means the index is filled with the dataset vectors and ready to search after calling
* `build`.
* - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but
* the index is left empty; you'd need to call `extend` on the index afterwards to populate it.
*/
bool add_data_on_build = true;
};

struct search_params {};
Expand Down Expand Up @@ -596,7 +587,6 @@ enable_if_valid_list_t<ListT> deserialize_list(const raft::resources& handle,
std::shared_ptr<ListT>& ld,
const typename ListT::spec_type& store_spec,
const typename ListT::spec_type& device_spec);

} // namespace ivf

}; // namespace cuvs::neighbors
92 changes: 92 additions & 0 deletions cpp/include/cuvs/neighbors/ivf_flat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ struct index_params : cuvs::neighbors::index_params {
* flag to `true` if you prefer to use as little GPU memory for the database as possible.
*/
bool conservative_memory_allocation = false;
/**
* Whether to add the dataset content to the index, i.e.:
*
* - `true` means the index is filled with the dataset vectors and ready to search after calling
* `build`.
* - `false` means `build` only trains the underlying model (e.g. quantizer or clustering), but
* the index is left empty; you'd need to call `extend` on the index afterwards to populate it.
*/
bool add_data_on_build = true;
};
/**
* @}
Expand Down Expand Up @@ -1870,6 +1879,89 @@ void reset_index(const raft::resources& res, index<int8_t, int64_t>* index);
*/
void reset_index(const raft::resources& res, index<uint8_t, int64_t>* index);

/**
* @brief Helper exposing the re-computation of list sizes and related arrays if IVF lists have been
* modified externally.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* raft::resources res;
* // use default index parameters
* ivf_pq::index_params index_params;
* // initialize an empty index
* ivf_pq::index<int64_t> index(res, index_params, D);
* ivf_pq::helpers::reset_index(res, &index);
* // resize the first IVF list to hold 5 records
* auto spec = list_spec<uint32_t, int64_t>{
* index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()};
* uint32_t new_size = 5;
* ivf::resize_list(res, list, spec, new_size, 0);
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
* // recompute the internal state of the index
* ivf_pq::helpers::recompute_internal_state(res, index);
* @endcode
*
* @param[in] res raft resource
* @param[inout] index pointer to IVF-PQ index
*/
void recompute_internal_state(const raft::resources& res, index<float, int64_t>* index);

/**
* @brief Helper exposing the re-computation of list sizes and related arrays if IVF lists have been
* modified externally.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* raft::resources res;
* // use default index parameters
* ivf_pq::index_params index_params;
* // initialize an empty index
* ivf_pq::index<int64_t> index(res, index_params, D);
* ivf_pq::helpers::reset_index(res, &index);
* // resize the first IVF list to hold 5 records
* auto spec = list_spec<uint32_t, int64_t>{
* index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()};
* uint32_t new_size = 5;
* ivf::resize_list(res, list, spec, new_size, 0);
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
* // recompute the internal state of the index
* ivf_pq::helpers::recompute_internal_state(res, index);
* @endcode
*
* @param[in] res raft resource
* @param[inout] index pointer to IVF-PQ index
*/
void recompute_internal_state(const raft::resources& res, index<int8_t, int64_t>* index);

/**
* @brief Helper exposing the re-computation of list sizes and related arrays if IVF lists have been
* modified externally.
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* raft::resources res;
* // use default index parameters
* ivf_pq::index_params index_params;
* // initialize an empty index
* ivf_pq::index<int64_t> index(res, index_params, D);
* ivf_pq::helpers::reset_index(res, &index);
* // resize the first IVF list to hold 5 records
* auto spec = list_spec<uint32_t, int64_t>{
* index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()};
* uint32_t new_size = 5;
* ivf::resize_list(res, list, spec, new_size, 0);
* raft::update_device(index.list_sizes(), &new_size, 1, stream);
* // recompute the internal state of the index
* ivf_pq::helpers::recompute_internal_state(res, index);
* @endcode
*
* @param[in] res raft resource
* @param[inout] index pointer to IVF-Flat index
*/
void recompute_internal_state(const raft::resources& res, index<uint8_t, int64_t>* index);
/**
* @}
*/
Expand Down
Loading

0 comments on commit 1e62df3

Please sign in to comment.