facebookresearch · cjnolet · Jan 8, 2019 · Jun 3, 2022 · Jun 3, 2022 · Jun 3, 2022
diff --git a/build.sh b/build.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# NOTE: This file is temporary for the proof-of-concept branch and will be removed before this PR is merged
+
+BUILD_TYPE=Release
+BUILD_DIR=build/
+
+RAFT_REPO_REL=""
+EXTRA_CMAKE_ARGS=""
+set -e
+
+if [[ ${RAFT_REPO_REL} != "" ]]; then
+  RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`"
+  EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}"
+fi
+
+if [ "$1" == "clean" ]; then
+  rm -rf build
+  rm -rf .cache
+  exit 0
+fi
+
+if [ "$1" == "test" ]; then
+  make -C build -j test
+  exit 0
+fi
+
+if [ "$1" == "test-raft" ]; then
+  ./build/faiss/gpu/test/TestRaftIndexIVFFlat
+  exit 0
+fi
+
+mkdir -p $BUILD_DIR
+cd $BUILD_DIR
+
+cmake \
+ -DFAISS_ENABLE_GPU=ON \
+ -DFAISS_ENABLE_RAFT=ON \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=ON \
+ -DBUILD_SHARED_LIBS=OFF \
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+ -DFAISS_OPT_LEVEL=avx2 \
+ -DRAFT_NVTX=OFF \
+ -DCMAKE_CUDA_ARCHITECTURES="NATIVE" \
+ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+ -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ ${EXTRA_CMAKE_ARGS} \
+ ../
+
+
+# make -C build -j12 faiss
+cmake  --build . -j12
+# make -C build -j12 swigfaiss
+# (cd build/faiss/python && python setup.py install)
+
diff --git a/cmake/thirdparty/fetch_rapids.cmake b/cmake/thirdparty/fetch_rapids.cmake
@@ -15,7 +15,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-set(RAPIDS_VERSION "23.06")
+set(RAPIDS_VERSION "23.08")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake

diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
@@ -238,9 +238,11 @@ generate_ivf_interleaved_code()
 
 if(FAISS_ENABLE_RAFT)
   list(APPEND FAISS_GPU_HEADERS
+          impl/RaftIVFFlat.cuh
           impl/RaftFlatIndex.cuh)
   list(APPEND FAISS_GPU_SRC
-          impl/RaftFlatIndex.cu)
+          impl/RaftFlatIndex.cu
+	  impl/RaftIVFFlat.cu)
 
   target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1)
   target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)

diff --git a/faiss/gpu/GpuIndexIVF.cu b/faiss/gpu/GpuIndexIVF.cu
@@ -16,6 +16,11 @@
 #include <faiss/gpu/impl/IVFBase.cuh>
 #include <faiss/gpu/utils/CopyUtils.cuh>
 
+#if defined USE_NVIDIA_RAFT
+#include <raft/core/handle.hpp>
+#include <raft/neighbors/ivf_flat.cuh>
+#endif
+
 namespace faiss {
 namespace gpu {
 
@@ -444,14 +449,46 @@ void GpuIndexIVF::trainQuantizer_(idx_t n, const float* x) {
         printf("Training IVF quantizer on %ld vectors in %dD\n", n, d);
     }
 
-    // leverage the CPU-side k-means code, which works for the GPU
-    // flat index as well
     quantizer->reset();
-    Clustering clus(this->d, nlist, this->cp);
-    clus.verbose = verbose;
-    clus.train(n, x, *quantizer);
-    quantizer->is_trained = true;
 
+#if defined USE_NVIDIA_RAFT
+
+    if (config_.use_raft) {
+        const raft::device_resources& raft_handle =
+                resources_->getRaftHandleCurrentDevice();
+
+        raft::neighbors::ivf_flat::index_params raft_idx_params;
+        raft_idx_params.n_lists = nlist;
+        raft_idx_params.metric = metric_type == faiss::METRIC_L2
+                ? raft::distance::DistanceType::L2Expanded
+                : raft::distance::DistanceType::InnerProduct;
+        raft_idx_params.add_data_on_build = false;
+        raft_idx_params.kmeans_trainset_fraction = 1.0;
+        raft_idx_params.kmeans_n_iters = cp.niter;
+        raft_idx_params.adaptive_centers = !cp.frozen_centroids;
+
+        auto raft_index = raft::neighbors::ivf_flat::build(
+                raft_handle, raft_idx_params, x, n, (idx_t)d);
+
+        raft_handle.sync_stream();
+
+        quantizer->train(nlist, raft_index.centers().data_handle());
+        quantizer->add(nlist, raft_index.centers().data_handle());
+    } else
+#else
+    if (config_.use_raft) {
+        FAISS_THROW_MSG(
+                "RAFT has not been compiled into the current version so it cannot be used.");
+    } else
+#endif
+    {
+        // leverage the CPU-side k-means code, which works for the GPU
+        // flat index as well
+        Clustering clus(this->d, nlist, this->cp);
+        clus.verbose = verbose;
+        clus.train(n, x, *quantizer);
+    }
+    quantizer->is_trained = true;
     FAISS_ASSERT(quantizer->ntotal == nlist);
 }
 

diff --git a/faiss/gpu/GpuIndexIVF.h b/faiss/gpu/GpuIndexIVF.h
@@ -73,23 +73,24 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     virtual void updateQuantizer() = 0;
 
     /// Returns the number of inverted lists we're managing
-    idx_t getNumLists() const;
+    virtual idx_t getNumLists() const;
 
     /// Returns the number of vectors present in a particular inverted list
-    idx_t getListLength(idx_t listId) const;
+    virtual idx_t getListLength(idx_t listId) const;
 
     /// Return the encoded vector data contained in a particular inverted list,
     /// for debugging purposes.
     /// If gpuFormat is true, the data is returned as it is encoded in the
     /// GPU-side representation.
     /// Otherwise, it is converted to the CPU format.
     /// compliant format, while the native GPU format may differ.
-    std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
-            const;
+    virtual std::vector<uint8_t> getListVectorData(
+            idx_t listId,
+            bool gpuFormat = false) const;
 
     /// Return the vector indices contained in a particular inverted list, for
     /// debugging purposes.
-    std::vector<idx_t> getListIndices(idx_t listId) const;
+    virtual std::vector<idx_t> getListIndices(idx_t listId) const;
 
     void search_preassigned(
             idx_t n,
@@ -121,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     int getCurrentNProbe_(const SearchParameters* params) const;
     void verifyIVFSettings_() const;
     bool addImplRequiresIDs_() const override;
-    void trainQuantizer_(idx_t n, const float* x);
+    virtual void trainQuantizer_(idx_t n, const float* x);
 
     /// Called from GpuIndex for add/add_with_ids
     void addImpl_(idx_t n, const float* x, const idx_t* ids) override;

diff --git a/faiss/gpu/GpuIndexIVFFlat.cu b/faiss/gpu/GpuIndexIVFFlat.cu
@@ -15,6 +15,10 @@
 #include <faiss/gpu/utils/CopyUtils.cuh>
 #include <faiss/gpu/utils/Float16.cuh>
 
+#if defined USE_NVIDIA_RAFT
+#include <faiss/gpu/impl/RaftIVFFlat.cuh>
+#endif
+
 #include <limits>
 
 namespace faiss {
@@ -70,8 +74,7 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
     // no other quantizer that we need to train, so this is sufficient
     if (this->is_trained) {
         FAISS_ASSERT(this->quantizer);
-
-        index_.reset(new IVFFlat(
+        set_index_(
                 resources_.get(),
                 this->d,
                 this->nlist,
@@ -81,14 +84,62 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
                 nullptr, // no scalar quantizer
                 ivfFlatConfig_.interleavedLayout,
                 ivfFlatConfig_.indicesOptions,
-                config_.memorySpace));
+                config_.memorySpace);
         baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
         updateQuantizer();
     }
 }
 
 GpuIndexIVFFlat::~GpuIndexIVFFlat() {}
 
+void GpuIndexIVFFlat::set_index_(
+        GpuResources* resources,
+        int dim,
+        int nlist,
+        faiss::MetricType metric,
+        float metricArg,
+        bool useResidual,
+        /// Optional ScalarQuantizer
+        faiss::ScalarQuantizer* scalarQ,
+        bool interleavedLayout,
+        IndicesOptions indicesOptions,
+        MemorySpace space) {
+#if defined USE_NVIDIA_RAFT
+
+    if (config_.use_raft) {
+        index_.reset(new RaftIVFFlat(
+                resources,
+                dim,
+                nlist,
+                metric,
+                metricArg,
+                useResidual,
+                scalarQ,
+                interleavedLayout,
+                indicesOptions,
+                space));
+    } else
+#else
+    if (config_.use_raft) {
+        FAISS_THROW_MSG(
+                "RAFT has not been compiled into the current version so it cannot be used.");
+    } else
+#endif
+    {
+        index_.reset(new IVFFlat(
+                resources,
+                dim,
+                nlist,
+                metric,
+                metricArg,
+                useResidual,
+                scalarQ,
+                interleavedLayout,
+                indicesOptions,
+                space));
+    }
+}
+
 void GpuIndexIVFFlat::reserveMemory(size_t numVecs) {
     DeviceScope scope(config_.device);
 
@@ -110,25 +161,25 @@ void GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) {
 
     // The other index might not be trained
     if (!index->is_trained) {
-        FAISS_ASSERT(!this->is_trained);
+        FAISS_ASSERT(!is_trained);
         return;
     }
 
     // Otherwise, we can populate ourselves from the other index
-    FAISS_ASSERT(this->is_trained);
+    FAISS_ASSERT(is_trained);
 
     // Copy our lists as well
-    index_.reset(new IVFFlat(
+    set_index_(
             resources_.get(),
-            this->d,
-            this->nlist,
+            d,
+            nlist,
             index->metric_type,
             index->metric_arg,
             false,   // no residual
             nullptr, // no scalar quantizer
             ivfFlatConfig_.interleavedLayout,
             ivfFlatConfig_.indicesOptions,
-            config_.memorySpace));
+            config_.memorySpace);
     baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
     updateQuantizer();
 
@@ -201,18 +252,30 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
 
     FAISS_ASSERT(!index_);
 
-    // FIXME: GPUize more of this
-    // First, make sure that the data is resident on the CPU, if it is not on
-    // the CPU, as we depend upon parts of the CPU code
-    auto hostData = toHost<float, 2>(
-            (float*)x,
-            resources_->getDefaultStream(config_.device),
-            {n, this->d});
-
-    trainQuantizer_(n, hostData.data());
+#if defined USE_NVIDIA_RAFT
+    if (config_.use_raft) {
+        // No need to copy the data to host
+        trainQuantizer_(n, x);
+    } else
+#else
+    if (config_.use_raft) {
+        FAISS_THROW_MSG(
+                "RAFT has not been compiled into the current version so it cannot be used.");
+    } else
+#endif
+    {
+        // FIXME: GPUize more of this
+        // First, make sure that the data is resident on the CPU, if it is not
+        // on the CPU, as we depend upon parts of the CPU code
+        auto hostData = toHost<float, 2>(
+                (float*)x,
+                resources_->getDefaultStream(config_.device),
+                {n, this->d});
+        trainQuantizer_(n, hostData.data());
+    }
 
     // The quantizer is now trained; construct the IVF index
-    index_.reset(new IVFFlat(
+    set_index_(
             resources_.get(),
             this->d,
             this->nlist,
@@ -222,7 +285,7 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
             nullptr, // no scalar quantizer
             ivfFlatConfig_.interleavedLayout,
             ivfFlatConfig_.indicesOptions,
-            config_.memorySpace));
+            config_.memorySpace);
     baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
     updateQuantizer();
 

diff --git a/faiss/gpu/GpuIndexIVFFlat.h b/faiss/gpu/GpuIndexIVFFlat.h
@@ -8,6 +8,8 @@
 #pragma once
 
 #include <faiss/gpu/GpuIndexIVF.h>
+#include <faiss/impl/ScalarQuantizer.h>
+
 #include <memory>
 
 namespace faiss {
@@ -86,6 +88,19 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
     void train(idx_t n, const float* x) override;
 
    protected:
+    void set_index_(
+            GpuResources* resources,
+            int dim,
+            int nlist,
+            faiss::MetricType metric,
+            float metricArg,
+            bool useResidual,
+            /// Optional ScalarQuantizer
+            faiss::ScalarQuantizer* scalarQ,
+            bool interleavedLayout,
+            IndicesOptions indicesOptions,
+            MemorySpace space);
+
     /// Our configuration options
     const GpuIndexIVFFlatConfig ivfFlatConfig_;
 

diff --git a/faiss/gpu/StandardGpuResources.cpp b/faiss/gpu/StandardGpuResources.cpp
@@ -361,6 +361,10 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
 
     defaultStreams_[device] = defaultStream;
 
+#if defined USE_NVIDIA_RAFT
+    raftHandles_.emplace(std::make_pair(device, defaultStream));
+#endif
+
     cudaStream_t asyncCopyStream = 0;
     CUDA_VERIFY(
             cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));