Skip to content
Closed
4 changes: 4 additions & 0 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set(FAISS_GPU_SRC
GpuIndexIVFFlat.cu
GpuIndexIVFPQ.cu
GpuIndexIVFScalarQuantizer.cu
$<$<BOOL:${FAISS_ENABLE_RAFT}>:GpuIndexCagra.cu>
GpuResources.cpp
StandardGpuResources.cpp
impl/BinaryDistance.cu
Expand Down Expand Up @@ -91,6 +92,7 @@ set(FAISS_GPU_HEADERS
GpuFaissAssert.h
GpuIndex.h
GpuIndexBinaryFlat.h
$<$<BOOL:${FAISS_ENABLE_RAFT}>:GpuIndexCagra.h>
GpuIndexFlat.h
GpuIndexIVF.h
GpuIndexIVFFlat.h
Expand Down Expand Up @@ -238,10 +240,12 @@ generate_ivf_interleaved_code()

if(FAISS_ENABLE_RAFT)
list(APPEND FAISS_GPU_HEADERS
impl/RaftCagra.cuh
impl/RaftUtils.h
impl/RaftIVFFlat.cuh
impl/RaftFlatIndex.cuh)
list(APPEND FAISS_GPU_SRC
impl/RaftCagra.cu
impl/RaftFlatIndex.cu
impl/RaftIVFFlat.cu)
endif()
Expand Down
131 changes: 131 additions & 0 deletions faiss/gpu/GpuIndexCagra.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <faiss/gpu/GpuIndexCagra.h>
#include <faiss/gpu/impl/RaftCagra.cuh>
#include "GpuIndexCagra.h"

namespace faiss {
namespace gpu {

GpuIndexCagra::GpuIndexCagra(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric,
GpuIndexCagraConfig config)
: GpuIndex(provider->getResources(), dims, metric, 0.0f, config),
cagraConfig_(config) {
this->is_trained = false;
}

void GpuIndexCagra::train(idx_t n, const float* x) {
if (this->is_trained) {
FAISS_ASSERT(index_);
return;
}

FAISS_ASSERT(!index_);

index_ = std::make_shared<RaftCagra>(
this->resources_.get(),
this->d,
cagraConfig_.intermediate_graph_degree,
cagraConfig_.graph_degree,
static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
cagraConfig_.nn_descent_niter,
this->metric_type,
this->metric_arg,
faiss::gpu::INDICES_64_BIT);

index_->train(n, x);

this->is_trained = true;
this->ntotal = n;
}

bool GpuIndexCagra::addImplRequiresIDs_() const {
return false;
};

void GpuIndexCagra::addImpl_(idx_t n, const float* x, const idx_t* ids) {
FAISS_THROW_MSG("adding vectors is not supported by GpuIndexCagra.");
};

void GpuIndexCagra::searchImpl_(
idx_t n,
const float* x,
int k,
float* distances,
idx_t* labels,
const SearchParameters* search_params) const {
FAISS_ASSERT(this->is_trained && index_);
FAISS_ASSERT(n > 0);

Tensor<float, 2, true> queries(const_cast<float*>(x), {n, this->d});
Tensor<float, 2, true> outDistances(distances, {n, k});
Tensor<idx_t, 2, true> outLabels(const_cast<idx_t*>(labels), {n, k});

SearchParametersCagra* params;
if (search_params) {
params = dynamic_cast<SearchParametersCagra*>(
const_cast<SearchParameters*>(search_params));
} else {
params = new SearchParametersCagra{};
}

index_->search(
queries,
k,
outDistances,
outLabels,
params->max_queries,
params->itopk_size,
params->max_iterations,
static_cast<faiss::cagra_search_algo>(params->algo),
params->team_size,
params->search_width,
params->min_iterations,
params->thread_block_size,
static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
params->hashmap_min_bitlen,
params->hashmap_max_fill_rate,
params->num_random_samplings,
params->rand_xor_mask);

if (not search_params) {
delete params;
}
}

void GpuIndexCagra::reset() {
DeviceScope scope(config_.device);

if (index_) {
index_->reset();
this->ntotal = 0;
} else {
FAISS_ASSERT(this->ntotal == 0);
}
}

} // namespace gpu
} // namespace faiss
145 changes: 145 additions & 0 deletions faiss/gpu/GpuIndexCagra.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <faiss/gpu/GpuIndex.h>

namespace faiss {
namespace gpu {

class RaftCagra;

enum class graph_build_algo {
/* Use IVF-PQ to build all-neighbors knn graph */
IVF_PQ,
/* Experimental, use NN-Descent to build all-neighbors knn graph */
NN_DESCENT
};

struct GpuIndexCagraConfig : public GpuIndexConfig {
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree = 128;
/** Degree of output graph. */
size_t graph_degree = 64;
/** ANN algorithm to build knn graph. */
graph_build_algo build_algo = graph_build_algo::IVF_PQ;
/** Number of Iterations to run if building with NN_DESCENT */
size_t nn_descent_niter = 20;
};

enum class search_algo {
/** For large batch sizes. */
SINGLE_CTA,
/** For small batch sizes. */
MULTI_CTA,
MULTI_KERNEL,
AUTO
};

enum class hash_mode { HASH, SMALL, AUTO };

struct SearchParametersCagra : SearchParameters {
/** Maximum number of queries to search at the same time (batch size). Auto
* select when 0.*/
size_t max_queries = 0;

/** Number of intermediate search results retained during the search.
*
* This is the main knob to adjust trade off between accuracy and search
* speed. Higher values improve the search accuracy.
*/
size_t itopk_size = 64;

/** Upper limit of search iterations. Auto select when 0.*/
size_t max_iterations = 0;

// In the following we list additional search parameters for fine tuning.
// Reasonable default values are automatically chosen.

/** Which search implementation to use. */
search_algo algo = search_algo::AUTO;

/** Number of threads used to calculate a single distance. 4, 8, 16, or 32.
*/
size_t team_size = 0;

/** Number of graph nodes to select as the starting point for the search in
* each iteration. aka search width?*/
size_t search_width = 1;
/** Lower limit of search iterations. */
size_t min_iterations = 0;

/** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */
size_t thread_block_size = 0;
/** Hashmap type. Auto selection when AUTO. */
hash_mode hashmap_mode = hash_mode::AUTO;
/** Lower limit of hashmap bit length. More than 8. */
size_t hashmap_min_bitlen = 0;
/** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/
float hashmap_max_fill_rate = 0.5;

/** Number of iterations of initial random seed node selection. 1 or more.
*/
uint32_t num_random_samplings = 1;
/** Bit mask used for initial random seed node selection. */
uint64_t rand_xor_mask = 0x128394;
};

struct GpuIndexCagra : public GpuIndex {
public:
GpuIndexCagra(
GpuResourcesProvider* provider,
int dims,
faiss::MetricType metric = faiss::METRIC_L2,
GpuIndexCagraConfig config = GpuIndexCagraConfig());

~GpuIndexCagra() override = default;

/// Trains CAGRA based on the given vector data
void train(idx_t n, const float* x) override;

void reset() override;

protected:
bool addImplRequiresIDs_() const override;

void addImpl_(idx_t n, const float* x, const idx_t* ids) override;

/// Called from GpuIndex for search
void searchImpl_(
idx_t n,
const float* x,
int k,
float* distances,
idx_t* labels,
const SearchParameters* search_params) const override;

/// Our configuration options
const GpuIndexCagraConfig cagraConfig_;

/// Instance that we own; contains the inverted lists
std::shared_ptr<RaftCagra> index_;
};

} // namespace gpu
} // namespace faiss
Loading