facebookresearch · divyegala · Oct 5, 2023 · Jan 23, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
@@ -29,6 +29,7 @@ set(FAISS_GPU_SRC
   GpuIndexIVFFlat.cu
   GpuIndexIVFPQ.cu
   GpuIndexIVFScalarQuantizer.cu
+  $<$<BOOL:${FAISS_ENABLE_RAFT}>:GpuIndexCagra.cu>
   GpuResources.cpp
   StandardGpuResources.cpp
   impl/BinaryDistance.cu
@@ -91,6 +92,7 @@ set(FAISS_GPU_HEADERS
   GpuFaissAssert.h
   GpuIndex.h
   GpuIndexBinaryFlat.h
+  $<$<BOOL:${FAISS_ENABLE_RAFT}>:GpuIndexCagra.h>
   GpuIndexFlat.h
   GpuIndexIVF.h
   GpuIndexIVFFlat.h
@@ -238,10 +240,12 @@ generate_ivf_interleaved_code()
 
 if(FAISS_ENABLE_RAFT)
   list(APPEND FAISS_GPU_HEADERS
+          impl/RaftCagra.cuh
           impl/RaftUtils.h
           impl/RaftIVFFlat.cuh
           impl/RaftFlatIndex.cuh)
   list(APPEND FAISS_GPU_SRC
+          impl/RaftCagra.cu
           impl/RaftFlatIndex.cu
           impl/RaftIVFFlat.cu)
 endif()

diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
@@ -0,0 +1,131 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <faiss/gpu/GpuIndexCagra.h>
+#include <faiss/gpu/impl/RaftCagra.cuh>
+#include "GpuIndexCagra.h"
+
+namespace faiss {
+namespace gpu {
+
+GpuIndexCagra::GpuIndexCagra(
+        GpuResourcesProvider* provider,
+        int dims,
+        faiss::MetricType metric,
+        GpuIndexCagraConfig config)
+        : GpuIndex(provider->getResources(), dims, metric, 0.0f, config),
+          cagraConfig_(config) {
+    this->is_trained = false;
+}
+
+void GpuIndexCagra::train(idx_t n, const float* x) {
+    if (this->is_trained) {
+        FAISS_ASSERT(index_);
+        return;
+    }
+
+    FAISS_ASSERT(!index_);
+
+    index_ = std::make_shared<RaftCagra>(
+            this->resources_.get(),
+            this->d,
+            cagraConfig_.intermediate_graph_degree,
+            cagraConfig_.graph_degree,
+            static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
+            cagraConfig_.nn_descent_niter,
+            this->metric_type,
+            this->metric_arg,
+            faiss::gpu::INDICES_64_BIT);
+
+    index_->train(n, x);
+
+    this->is_trained = true;
+    this->ntotal = n;
+}
+
+bool GpuIndexCagra::addImplRequiresIDs_() const {
+    return false;
+};
+
+void GpuIndexCagra::addImpl_(idx_t n, const float* x, const idx_t* ids) {
+    FAISS_THROW_MSG("adding vectors is not supported by GpuIndexCagra.");
+};
+
+void GpuIndexCagra::searchImpl_(
+        idx_t n,
+        const float* x,
+        int k,
+        float* distances,
+        idx_t* labels,
+        const SearchParameters* search_params) const {
+    FAISS_ASSERT(this->is_trained && index_);
+    FAISS_ASSERT(n > 0);
+
+    Tensor<float, 2, true> queries(const_cast<float*>(x), {n, this->d});
+    Tensor<float, 2, true> outDistances(distances, {n, k});
+    Tensor<idx_t, 2, true> outLabels(const_cast<idx_t*>(labels), {n, k});
+
+    SearchParametersCagra* params;
+    if (search_params) {
+        params = dynamic_cast<SearchParametersCagra*>(
+                const_cast<SearchParameters*>(search_params));
+    } else {
+        params = new SearchParametersCagra{};
+    }
+
+    index_->search(
+            queries,
+            k,
+            outDistances,
+            outLabels,
+            params->max_queries,
+            params->itopk_size,
+            params->max_iterations,
+            static_cast<faiss::cagra_search_algo>(params->algo),
+            params->team_size,
+            params->search_width,
+            params->min_iterations,
+            params->thread_block_size,
+            static_cast<faiss::cagra_hash_mode>(params->hashmap_mode),
+            params->hashmap_min_bitlen,
+            params->hashmap_max_fill_rate,
+            params->num_random_samplings,
+            params->rand_xor_mask);
+
+    if (not search_params) {
+        delete params;
+    }
+}
+
+void GpuIndexCagra::reset() {
+    DeviceScope scope(config_.device);
+
+    if (index_) {
+        index_->reset();
+        this->ntotal = 0;
+    } else {
+        FAISS_ASSERT(this->ntotal == 0);
+    }
+}
+
+} // namespace gpu
+} // namespace faiss
diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h
@@ -0,0 +1,145 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <faiss/gpu/GpuIndex.h>
+
+namespace faiss {
+namespace gpu {
+
+class RaftCagra;
+
+enum class graph_build_algo {
+    /* Use IVF-PQ to build all-neighbors knn graph */
+    IVF_PQ,
+    /* Experimental, use NN-Descent to build all-neighbors knn graph */
+    NN_DESCENT
+};
+
+struct GpuIndexCagraConfig : public GpuIndexConfig {
+    /** Degree of input graph for pruning. */
+    size_t intermediate_graph_degree = 128;
+    /** Degree of output graph. */
+    size_t graph_degree = 64;
+    /** ANN algorithm to build knn graph. */
+    graph_build_algo build_algo = graph_build_algo::IVF_PQ;
+    /** Number of Iterations to run if building with NN_DESCENT */
+    size_t nn_descent_niter = 20;
+};
+
+enum class search_algo {
+    /** For large batch sizes. */
+    SINGLE_CTA,
+    /** For small batch sizes. */
+    MULTI_CTA,
+    MULTI_KERNEL,
+    AUTO
+};
+
+enum class hash_mode { HASH, SMALL, AUTO };
+
+struct SearchParametersCagra : SearchParameters {
+    /** Maximum number of queries to search at the same time (batch size). Auto
+     * select when 0.*/
+    size_t max_queries = 0;
+
+    /** Number of intermediate search results retained during the search.
+     *
+     *  This is the main knob to adjust trade off between accuracy and search
+     * speed. Higher values improve the search accuracy.
+     */
+    size_t itopk_size = 64;
+
+    /** Upper limit of search iterations. Auto select when 0.*/
+    size_t max_iterations = 0;
+
+    // In the following we list additional search parameters for fine tuning.
+    // Reasonable default values are automatically chosen.
+
+    /** Which search implementation to use. */
+    search_algo algo = search_algo::AUTO;
+
+    /** Number of threads used to calculate a single distance. 4, 8, 16, or 32.
+     */
+    size_t team_size = 0;
+
+    /** Number of graph nodes to select as the starting point for the search in
+     * each iteration. aka search width?*/
+    size_t search_width = 1;
+    /** Lower limit of search iterations. */
+    size_t min_iterations = 0;
+
+    /** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */
+    size_t thread_block_size = 0;
+    /** Hashmap type. Auto selection when AUTO. */
+    hash_mode hashmap_mode = hash_mode::AUTO;
+    /** Lower limit of hashmap bit length. More than 8. */
+    size_t hashmap_min_bitlen = 0;
+    /** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/
+    float hashmap_max_fill_rate = 0.5;
+
+    /** Number of iterations of initial random seed node selection. 1 or more.
+     */
+    uint32_t num_random_samplings = 1;
+    /** Bit mask used for initial random seed node selection. */
+    uint64_t rand_xor_mask = 0x128394;
+};
+
+struct GpuIndexCagra : public GpuIndex {
+   public:
+    GpuIndexCagra(
+            GpuResourcesProvider* provider,
+            int dims,
+            faiss::MetricType metric = faiss::METRIC_L2,
+            GpuIndexCagraConfig config = GpuIndexCagraConfig());
+
+    ~GpuIndexCagra() override = default;
+
+    /// Trains CAGRA based on the given vector data
+    void train(idx_t n, const float* x) override;
+
+    void reset() override;
+
+   protected:
+    bool addImplRequiresIDs_() const override;
+
+    void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
+
+    /// Called from GpuIndex for search
+    void searchImpl_(
+            idx_t n,
+            const float* x,
+            int k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* search_params) const override;
+
+    /// Our configuration options
+    const GpuIndexCagraConfig cagraConfig_;
+
+    /// Instance that we own; contains the inverted lists
+    std::shared_ptr<RaftCagra> index_;
+};
+
+} // namespace gpu
+} // namespace faiss