diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index a5f9372aec..54f0a42f5d 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -39,11 +39,11 @@ runs: conda update -y -q conda echo "$CONDA/bin" >> $GITHUB_PATH - conda install -y -q python=3.11 cmake=3.26 make=4.2 swig=4.0 "numpy<2" scipy=1.14 pytest=7.4 gflags=2.2 + conda install -y -q python=3.11 cmake=3.30.4 make=4.2 swig=4.0 "numpy<2" scipy=1.14 pytest=7.4 gflags=2.2 # install base packages for ARM64 if [ "${{ runner.arch }}" = "ARM64" ]; then - conda install -y -q -c conda-forge openblas=0.3 gxx_linux-aarch64=14.2 sysroot_linux-aarch64=2.17 + conda install -y -q -c conda-forge openblas=0.3.29 gxx_linux-aarch64=14.2 sysroot_linux-aarch64=2.17 fi # install base packages for X86_64 @@ -61,7 +61,7 @@ runs: conda install -y -q cuda-toolkit=12.4 -c "nvidia/label/cuda-12.4.0" # and CUDA from cuVS channel for cuVS builds elif [ "${{ inputs.cuvs }}" = "ON" ]; then - conda install -y -q libcuvs=24.12 'cuda-version>=12.0,<=12.5' cuda-toolkit=12.4.1 gxx_linux-64=12.4 -c rapidsai -c conda-forge + conda install -y -q libcuvs=25.04 'cuda-version>=12.0,<=12.5' cuda-toolkit=12.4.1 gxx_linux-64=12.4 -c rapidsai -c rapidsai-nightly -c conda-forge fi # install test packages diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml index d2e56d23c3..c27446d840 100644 --- a/.github/actions/build_conda/action.yml +++ b/.github/actions/build_conda/action.yml @@ -44,7 +44,7 @@ runs: # Ensure starting packages are from conda-forge. conda list --show-channel-urls conda install -y -q "conda!=24.11.0" - conda install -y -q "conda-build!=24.11.0" + conda install -y -q "conda-build=25.3.1" "liblief=0.14.1" conda list --show-channel-urls - name: Enable anaconda uploads if: inputs.label != '' diff --git a/.github/workflows/retry_build.yml b/.github/workflows/retry_build.yml index ff4e944adf..45c07ffff3 100644 --- a/.github/workflows/retry_build.yml +++ b/.github/workflows/retry_build.yml @@ -15,7 +15,10 @@ jobs: GH_TOKEN: ${{ github.token }} GH_DEBUG: api run: | - while gh run view ${{ inputs.run_id }} --json status | grep -q in_progress + # status can be one of "queued", "in_progress", "completed", "waiting", "requested", "pending" + # https://docs.github.com/en/rest/checks/runs + # while not completed, sleep for 10 minutes + while gh run view ${{ inputs.run_id }} --json status | grep -v completed do echo Workflow in progress - sleeping for 10 minutes then checking again sleep 10m diff --git a/CHANGELOG.md b/CHANGELOG.md index c1771f2927..f6826aee8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,87 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [1.11.0] - 2025-04-24 + + +Added +- RaBitQ implementation (#4235) +- Add RaBitQ to the swigfaiss so we can access its properties correctly in python (#4304) +- Add date and time to the codec file path so that the file doesn't get overridden with each run (#4303) +- Add missing header in faiss/CMakeLists.txt (#4285) +- Implement is_spherical and normalize_L2 booleans as part of the training APIs (#4279) +- Add normalize_l2 boolean to distributed training API +- re-land mmap diff (#4250) +- SearchParameters support for IndexBinaryFlat (#4055) +- Support non-partition col and map in the embedding reader (#4229) +- Support cosine distance for training vectors (#4227) +- Add missing #include in code_distance-sve.h (#4219) +- Add the support for IndexIDMap with Cagra index (#4188) +- Add bounds checking to hnsw nb_neighbors (#4185) +- Add sharding convenience function for IVF indexes (#4150) +- Added support for building for MinGW, in addition to MSVC (#4145) + +Changed +- Skip mmap test case in AIX. (#4275) +- Handle insufficient driver gracefully (#4271) +- relax input params for IndexIVFRaBitQ::get_InvertedListScanner() (#4270) +- Allow using custom index readers and writers (#4180) +- Upgrade to libcuvs=25.04 (#4164) +- ignore regex (#4264) +- Publish the C API to Conda (#4186) +- Pass row filters to Hive Reader to filter rows (#4256) +- Back out "test merge with internal repo" (#4244) +- test merge with internal repo (#4242) +- Revert D69972250: Memory-mapping and Zero-copy deserializers +- Revert D69984379: mem mapping and zero-copy python fixes +- mem mapping and zero-copy python fixes (#4212) +- Memory-mapping and Zero-copy deserializers (#4199) +- Use `nullptr` in faiss/gpu/StandardGpuResources.cpp (#4232) +- Make static method in header inline (#4214) +- Upgrade openblas to 0.3.29 for ARM architectures (#4203) +- Pass `store_dataset` argument along to cuVS CAGRA (#4173) +- Handle plain SearchParameters in HNSW searches (#4167) +- Update INSTALL.md to remove some raft references, add missing dependency (#4176) +- Update README.md (#4169) +- Update CAGRA docs (#4152) +- Expose IDSelectorBitmap in the C_API (#4158) + +Fixed +- fix: algorithm of spreading vectors over shards (#4299) +- Fix overflow of int32 in IndexNSG (#4297) +- Fix Type Error in Conditional Logic (#4294) +- faiss/gpu/GpuAutoTune.cpp: fix llvm-19-exposed -Wunused-but-set-variable warnings +- Fix nightly by pinning conda-build to prevent regression in 25.3.2 (#4287) +- Fix CQS signal. Id] 88153895 -- readability-redundant-string-init in fbcode/faiss (#4283) +- Fix a placeholder for 'unimplemented' in mapped_io.cpp (#4268) +- fix bug: IVFPQ of raft/cuvs does not require redundant check (#4241) +- fix a serialization problem in RaBitQ (#4261) +- Grammar fix in FlatIndexHNSW (#4253) +- Fix CUDA kernel index data type in faiss/gpu/impl/DistanceUtils.cuh +10 (#4246) +- fix `IVFPQFastScan::RangeSearch()` on the `ARM` architecture (#4247) +- fix integer overflow issue when calculating imbalance_factor (#4245) +- Fix bug with metric_arg in IndexHNSW (#4239) +- Address compile errors and warnings (#4238) +- faiss: fix non-templated hammings function (#4195) +- Fix LLVM-19 compilation issue in faiss/AutoTune.cpp (#4220) +- Fix cloning and reverse index factory for NSG indices (#4151) +- Remove python_abi to fix nightly (#4217) +- Fix IVF quantizer centroid sharding so IDs are generated (#4197) +- Pin lief to fix nightly (#4211) +- Fix Sapphire Rapids never loading in Python bindings (#4209) +- Attempt to nightly fix (#4204) +- Fix nightly by installing earlier version of lief (#4198) +- Check for not completed +- Fix install error when building avx512_spr variant (#4170) +- fix: gpu tests link failure with static lib (#4137) +- Fix the order of parameters in bench_scalar_quantizer_distance. (#4159) + +Deprecated +- Remove unused exception parameter from faiss/impl/ResultHandler.h (#4243) +- Remove unused variable (#4205) + + + ## [1.10.0] - 2025-01-30 @@ -459,7 +540,9 @@ by conda install -c pytorch faiss-gpu cudatoolkit=10.0. - C bindings. - Extended tutorial to GPU indices. -[Unreleased]: https://github.com/facebookresearch/faiss/compare/v1.9.0...HEAD +[Unreleased]: https://github.com/facebookresearch/faiss/compare/v1.11.0...HEAD +[1.11.0]: https://github.com/facebookresearch/faiss/compare/v1.10.0...v1.11.0 +[1.10.0]: https://github.com/facebookresearch/faiss/compare/v1.9.0...v1.10.0 [1.9.0]: https://github.com/facebookresearch/faiss/compare/v1.8.0...v1.9.0 [1.8.0]: https://github.com/facebookresearch/faiss/compare/v1.7.4...v1.8.0 [1.7.4]: https://github.com/facebookresearch/faiss/compare/v1.7.3...v1.7.4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 328c4a5e27..565a0306b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,20 +34,20 @@ if(FAISS_ENABLE_GPU) endif() if(FAISS_ENABLE_CUVS) -include(cmake/thirdparty/fetch_rapids.cmake) -include(rapids-cmake) -include(rapids-cpm) -include(rapids-cuda) -include(rapids-export) -include(rapids-find) - -rapids_cuda_init_architectures(faiss) -rapids_cuda_init_architectures(pyfaiss) -rapids_cuda_init_architectures(faiss_c_library) + include(cmake/thirdparty/fetch_rapids.cmake) + include(rapids-cmake) + include(rapids-cpm) + include(rapids-cuda) + include(rapids-export) + include(rapids-find) + + rapids_cuda_init_architectures(faiss) + rapids_cuda_init_architectures(pyfaiss) + rapids_cuda_init_architectures(faiss_c_library) endif() project(faiss - VERSION 1.10.0 + VERSION 1.11.0 DESCRIPTION "A library for efficient similarity search and clustering of dense vectors." HOMEPAGE_URL "https://github.com/facebookresearch/faiss" LANGUAGES ${FAISS_LANGUAGES}) diff --git a/INSTALL.md b/INSTALL.md index 6e75826a56..8acbf4563f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -6,26 +6,26 @@ pre-release nightly builds. - The CPU-only faiss-cpu conda package is currently available on Linux (x86-64 and aarch64), OSX (arm64 only), and Windows (x86-64) - faiss-gpu, containing both CPU and GPU indices, is available on Linux (x86-64 only) for CUDA 11.4 and 12.1 -- faiss-gpu-raft [^1] package containing GPU indices provided by [NVIDIA RAFT](https://github.com/rapidsai/raft/) version 24.06, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4. +- faiss-gpu-cuvs package containing GPU indices provided by [NVIDIA cuVS](https://github.com/rapidsai/cuvs/) version 24.12, is available on Linux (x86-64 only) for CUDA 11.8 and 12.4. To install the latest stable release: ``` shell # CPU-only version -$ conda install -c pytorch faiss-cpu=1.10.0 +$ conda install -c pytorch faiss-cpu=1.11.0 # GPU(+CPU) version -$ conda install -c pytorch -c nvidia faiss-gpu=1.10.0 +$ conda install -c pytorch -c nvidia faiss-gpu=1.11.0 -# GPU(+CPU) version with NVIDIA RAFT -$ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge faiss-gpu-raft=1.10.0 +# GPU(+CPU) version with NVIDIA cuVS +$ conda install -c pytorch -c nvidia -c rapidsai -c conda-forge libnvjitlink faiss-gpu-cuvs=1.11.0 # GPU(+CPU) version using AMD ROCm not yet available ``` For faiss-gpu, the nvidia channel is required for CUDA, which is not published in the main anaconda channel. -For faiss-gpu-raft, the rapidsai, conda-forge and nvidia channels are required. +For faiss-gpu-cuvs, the rapidsai, conda-forge and nvidia channels are required. Nightly pre-release packages can be installed as follows: @@ -34,13 +34,13 @@ Nightly pre-release packages can be installed as follows: $ conda install -c pytorch/label/nightly faiss-cpu # GPU(+CPU) version -$ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.10.0 +$ conda install -c pytorch/label/nightly -c nvidia faiss-gpu=1.11.0 # GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 12.4) -conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5' +conda install -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=12.0,<=12.5' # GPU(+CPU) version with NVIDIA cuVS (package built with CUDA 11.8) -conda install -c pytorch -c rapidsai -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=11.4,<=11.8' +conda install -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia pytorch/label/nightly::faiss-gpu-cuvs 'cuda-version>=11.4,<=11.8' # GPU(+CPU) version using AMD ROCm not yet available ``` @@ -321,5 +321,3 @@ and you can run $ python demos/demo_auto_tune.py ``` to test the GPU code. - -[^1]: The vector search and clustering algorithms in NVIDIA RAFT have been formally migrated to [NVIDIA cuVS](https://github.com/rapidsai/cuvs). This package is being renamed to `faiss-gpu-cuvs` in the next stable release, which will use these GPU implementations from the pre-compiled `libcuvs=24.12` binary. diff --git a/README.md b/README.md index 468ba59ab6..1a6949ab45 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ For the GPU version of Faiss, please cite: ## Join the Faiss community -For public discussion of Faiss or for questions, there is a Facebook group at https://www.facebook.com/groups/faissusers/ +For public discussion of Faiss or for questions, visit https://github.com/facebookresearch/faiss/discussions. We monitor the [issues page](http://github.com/facebookresearch/faiss/issues) of the repository. You can report bugs, ask questions, etc. diff --git a/benchs/bench_fw/descriptors.py b/benchs/bench_fw/descriptors.py index 8b1d65a505..cf0ee958fe 100644 --- a/benchs/bench_fw/descriptors.py +++ b/benchs/bench_fw/descriptors.py @@ -83,8 +83,15 @@ class DatasetDescriptor: embedding_column: Optional[str] = None + # only when the embedding column is a map + embedding_column_key: Optional[Any] = None + embedding_id_column: Optional[str] = None + # filters on the dataset where each filter is a + # string rep of a filter expression + filters: Optional[List[str]] = None + # unused in open-source splits_distribution: Optional[List[List[bytes]]] = None @@ -106,6 +113,10 @@ class DatasetDescriptor: # desc_name desc_name: Optional[str] = None + filename_suffix: Optional[str] = None + + normalize_L2: bool = False + def __hash__(self): return hash(self.get_filename()) @@ -129,6 +140,8 @@ def get_filename( ).replace("=", "_").replace("/", "_") if self.num_vectors is not None: filename += f"_{self.num_vectors}" + if self.filename_suffix is not None: + filename += f"_{self.filename_suffix}" filename += "." self.desc_name = filename @@ -214,6 +227,8 @@ class CodecDescriptor(IndexBaseDescriptor): factory: Optional[str] = None construction_params: Optional[List[Dict[str, int]]] = None training_vectors: Optional[DatasetDescriptor] = None + normalize_l2: bool = False + is_spherical: bool = False FILENAME_PREFIX: str = "xt" def __post_init__(self): diff --git a/benchs/bench_fw/index.py b/benchs/bench_fw/index.py index fe2fe103ef..b1252ad1b0 100644 --- a/benchs/bench_fw/index.py +++ b/benchs/bench_fw/index.py @@ -1138,6 +1138,8 @@ def assemble(self, dry_run): return None, None, "" logger.info(f"assemble, train {self.factory}") xt = self.io.get_dataset(self.training_vectors) + if self.training_vectors.normalize_L2: + faiss.normalize_L2(xt) _, t, _ = timer("train", lambda: codec.train(xt), once=True) t_aggregate += t diff --git a/c_api/CMakeLists.txt b/c_api/CMakeLists.txt index cffb8c307c..b2d33c54f1 100644 --- a/c_api/CMakeLists.txt +++ b/c_api/CMakeLists.txt @@ -32,9 +32,11 @@ set(FAISS_C_SRC index_io_c.cpp index_io_c_ex.cpp impl/AuxIndexStructures_c.cpp + impl/io_c.cpp utils/distances_c.cpp utils/utils_c.cpp ) + add_library(faiss_c ${FAISS_C_SRC}) if(FAISS_OPT_LEVEL STREQUAL "generic") target_link_libraries(faiss_c PRIVATE faiss) @@ -43,13 +45,73 @@ elseif(FAISS_OPT_LEVEL STREQUAL "avx2") elseif(FAISS_OPT_LEVEL STREQUAL "avx512") target_link_libraries(faiss_c PRIVATE faiss_avx512) endif() -install(TARGETS faiss_c - EXPORT faiss-targets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} -) + +add_library(faiss_c_avx2 ${FAISS_C_SRC}) +target_link_libraries(faiss_c_avx2 PRIVATE faiss_avx2) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512_spr") + set_target_properties(faiss_c_avx2 PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() +if(NOT WIN32) + target_compile_options(faiss_c_avx2 PRIVATE $<$:-mavx2 -mfma -mf16c -mpopcnt>) +else() + # MSVC enables FMA with /arch:AVX2; no separate flags for F16C, POPCNT + # Ref. FMA (under /arch:AVX2): https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64 + # Ref. F16C (2nd paragraph): https://walbourn.github.io/directxmath-avx2/ + # Ref. POPCNT: https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64 + target_compile_options(faiss_c_avx2 PRIVATE $<$:/arch:AVX2>) +endif() + +add_library(faiss_c_avx512 ${FAISS_C_SRC}) +target_link_libraries(faiss_c_avx512 PRIVATE faiss_avx512) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx512") + set_target_properties(faiss_c_avx512 PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() +if(NOT WIN32) + # All modern CPUs support F, CD, VL, DQ, BW extensions. + # Ref: https://en.wikipedia.org/wiki/AVX512 + target_compile_options(faiss_c_avx512 PRIVATE $<$:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>) +else() + target_compile_options(faiss_c_avx512 PRIVATE $<$:/arch:AVX512>) +endif() + +add_library(faiss_c_avx512_spr ${FAISS_C_SRC}) +target_link_libraries(faiss_c_avx512_spr PRIVATE faiss_avx512_spr) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx512_spr") + set_target_properties(faiss_c_avx512_spr PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() +if(NOT WIN32) + # Architecture mode to support AVX512 extensions available since Intel(R) Sapphire Rapids. + # Ref: https://networkbuilders.intel.com/solutionslibrary/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide + target_compile_options(faiss_c_avx512_spr PRIVATE $<$:-march=sapphirerapids -mtune=sapphirerapids>) +else() + target_compile_options(faiss_c_avx512_spr PRIVATE $<$:/arch:AVX512>) +endif() + +add_library(faiss_c_sve ${FAISS_C_SRC}) +target_link_libraries(faiss_c_sve PRIVATE faiss_sve) +if(NOT FAISS_OPT_LEVEL STREQUAL "sve") + set_target_properties(faiss_c_sve PROPERTIES EXCLUDE_FROM_ALL TRUE) +endif() +if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_c_sve PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_c_sve PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_c_sve PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_c_sve PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() +endif() function(faiss_install_headers headers p) foreach(h ${headers}) @@ -68,6 +130,42 @@ file(GLOB FAISS_C_API_HEADERS faiss_install_headers("${FAISS_C_API_HEADERS}" c_api) +install(TARGETS faiss_c + EXPORT faiss-targets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) +if(FAISS_OPT_LEVEL STREQUAL "avx2") + install(TARGETS faiss_c_avx2 + EXPORT faiss-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) +endif() +if(FAISS_OPT_LEVEL STREQUAL "avx512") + install(TARGETS faiss_c_avx2 faiss_c_avx512 + EXPORT faiss-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) +endif() +if(FAISS_OPT_LEVEL STREQUAL "avx512_spr") + install(TARGETS faiss_c_avx2 faiss_c_avx512_spr + EXPORT faiss-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) +endif() +if(FAISS_OPT_LEVEL STREQUAL "sve") + install(TARGETS faiss_c_sve + EXPORT faiss-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) +endif() + add_executable(example_c EXCLUDE_FROM_ALL example_c.c) target_link_libraries(example_c PRIVATE faiss_c) diff --git a/c_api/gpu/CMakeLists.txt b/c_api/gpu/CMakeLists.txt index 5fdfc34dfd..3c7214a576 100644 --- a/c_api/gpu/CMakeLists.txt +++ b/c_api/gpu/CMakeLists.txt @@ -17,9 +17,17 @@ faiss_install_headers("${FAISS_C_API_GPU_HEADERS}" c_api/gpu) if (FAISS_ENABLE_ROCM) target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas) + target_link_libraries(faiss_c_avx2 PUBLIC hip::host roc::hipblas) + target_link_libraries(faiss_c_avx512 PUBLIC hip::host roc::hipblas) + target_link_libraries(faiss_c_avx512_spr PUBLIC hip::host roc::hipblas) + target_link_libraries(faiss_c_sve PUBLIC hip::host roc::hipblas) else() find_package(CUDAToolkit REQUIRED) target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) + target_link_libraries(faiss_c_avx2 PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) + target_link_libraries(faiss_c_avx512 PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) + target_link_libraries(faiss_c_avx512_spr PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) + target_link_libraries(faiss_c_sve PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) endif() add_executable(example_gpu_c EXCLUDE_FROM_ALL example_gpu_c.c) diff --git a/c_api/impl/AuxIndexStructures_c.cpp b/c_api/impl/AuxIndexStructures_c.cpp index 534d5a0769..a7d4f4d5f5 100644 --- a/c_api/impl/AuxIndexStructures_c.cpp +++ b/c_api/impl/AuxIndexStructures_c.cpp @@ -19,6 +19,7 @@ using faiss::DistanceComputer; using faiss::IDSelector; using faiss::IDSelectorAnd; using faiss::IDSelectorBatch; +using faiss::IDSelectorBitmap; using faiss::IDSelectorNot; using faiss::IDSelectorOr; using faiss::IDSelectorRange; @@ -119,6 +120,23 @@ int faiss_IDSelectorBatch_new( CATCH_AND_HANDLE } +DEFINE_DESTRUCTOR(IDSelectorBitmap) + +DEFINE_GETTER(IDSelectorBitmap, size_t, n) +DEFINE_GETTER(IDSelectorBitmap, const uint8_t*, bitmap) + +int faiss_IDSelectorBitmap_new( + FaissIDSelectorBitmap** p_sel, + size_t n, + const uint8_t* bitmap) { + try { + *p_sel = reinterpret_cast( + new IDSelectorBitmap(n, bitmap)); + return 0; + } + CATCH_AND_HANDLE +} + int faiss_IDSelectorNot_new( FaissIDSelectorNot** p_sel, const FaissIDSelector* sel) { diff --git a/c_api/impl/AuxIndexStructures_c.h b/c_api/impl/AuxIndexStructures_c.h index 86b017a432..c4be6318bc 100644 --- a/c_api/impl/AuxIndexStructures_c.h +++ b/c_api/impl/AuxIndexStructures_c.h @@ -81,6 +81,17 @@ int faiss_IDSelectorBatch_new( size_t n, const idx_t* indices); +FAISS_DECLARE_CLASS(IDSelectorBitmap) +FAISS_DECLARE_DESTRUCTOR(IDSelectorBitmap) + +FAISS_DECLARE_GETTER(IDSelectorBitmap, size_t, n) +FAISS_DECLARE_GETTER(IDSelectorBitmap, const uint8_t*, bitmap) + +int faiss_IDSelectorBitmap_new( + FaissIDSelectorBitmap** p_sel, + size_t n, + const uint8_t* bitmap); + FAISS_DECLARE_CLASS(IDSelectorNot) int faiss_IDSelectorNot_new( FaissIDSelectorNot** p_sel, diff --git a/c_api/impl/io_c.cpp b/c_api/impl/io_c.cpp new file mode 100644 index 0000000000..58597b97fb --- /dev/null +++ b/c_api/impl/io_c.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c++ -*- + +#include "io_c.h" +#include +#include "../macros_impl.h" + +using faiss::IOReader; +using faiss::IOWriter; + +struct CustomIOReader : IOReader { + size_t (*func)(void* ptr, size_t size, size_t nitems) = nullptr; + + CustomIOReader(size_t (*func_in)(void* ptr, size_t size, size_t nitems)); + + size_t operator()(void* ptr, size_t size, size_t nitems) override; +}; + +CustomIOReader::CustomIOReader( + size_t (*func_in)(void* ptr, size_t size, size_t nitems)) + : func(func_in) {} + +size_t CustomIOReader::operator()(void* ptr, size_t size, size_t nitems) { + return func(ptr, size, nitems); +} + +int faiss_CustomIOReader_new( + FaissCustomIOReader** p_out, + size_t (*func_in)(void* ptr, size_t size, size_t nitems)) { + try { + *p_out = reinterpret_cast( + new CustomIOReader(func_in)); + } + CATCH_AND_HANDLE +} + +void faiss_CustomIOReader_free(FaissCustomIOReader* obj) { + delete reinterpret_cast(obj); +} + +struct CustomIOWriter : IOWriter { + size_t (*func)(const void* ptr, size_t size, size_t nitems) = nullptr; + + CustomIOWriter( + size_t (*func_in)(const void* ptr, size_t size, size_t nitems)); + + size_t operator()(const void* ptr, size_t size, size_t nitems) override; +}; + +CustomIOWriter::CustomIOWriter( + size_t (*func_in)(const void* ptr, size_t size, size_t nitems)) + : func(func_in) {} + +size_t CustomIOWriter::operator()(const void* ptr, size_t size, size_t nitems) { + return func(ptr, size, nitems); +} + +int faiss_CustomIOWriter_new( + FaissCustomIOWriter** p_out, + size_t (*func_in)(const void* ptr, size_t size, size_t nitems)) { + try { + *p_out = reinterpret_cast( + new CustomIOWriter(func_in)); + } + CATCH_AND_HANDLE +} + +void faiss_CustomIOWriter_free(FaissCustomIOWriter* obj) { + delete reinterpret_cast(obj); +} diff --git a/c_api/impl/io_c.h b/c_api/impl/io_c.h new file mode 100644 index 0000000000..94a604828d --- /dev/null +++ b/c_api/impl/io_c.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// -*- c -*- + +#ifndef FAISS_IO_C_H +#define FAISS_IO_C_H + +#include +#include "../faiss_c.h" + +#ifdef __cplusplus +extern "C" { +#endif + +FAISS_DECLARE_CLASS(IOReader) +FAISS_DECLARE_DESTRUCTOR(IOReader) + +FAISS_DECLARE_CLASS(IOWriter) +FAISS_DECLARE_DESTRUCTOR(IOWriter) + +/******************************************************* + * Custom reader + writer + * + * Reader and writer which wraps a function pointer, + * primarily for FFI use. + *******************************************************/ + +FAISS_DECLARE_CLASS(CustomIOReader) +FAISS_DECLARE_DESTRUCTOR(CustomIOReader) + +int faiss_CustomIOReader_new( + FaissCustomIOReader** p_out, + size_t (*func_in)(void* ptr, size_t size, size_t nitems)); + +FAISS_DECLARE_CLASS(CustomIOWriter) +FAISS_DECLARE_DESTRUCTOR(CustomIOWriter) + +int faiss_CustomIOWriter_new( + FaissCustomIOWriter** p_out, + size_t (*func_in)(const void* ptr, size_t size, size_t nitems)); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/c_api/index_io_c.cpp b/c_api/index_io_c.cpp index 889f32a251..4e50cd6926 100644 --- a/c_api/index_io_c.cpp +++ b/c_api/index_io_c.cpp @@ -15,6 +15,8 @@ using faiss::Index; using faiss::IndexBinary; +using faiss::IOReader; +using faiss::IOWriter; using faiss::VectorTransform; int faiss_write_index(const FaissIndex* idx, FILE* f) { @@ -31,6 +33,19 @@ int faiss_write_index_fname(const FaissIndex* idx, const char* fname) { CATCH_AND_HANDLE } +int faiss_write_index_custom( + const FaissIndex* idx, + FaissIOWriter* io_writer, + int io_flags) { + try { + faiss::write_index( + reinterpret_cast(idx), + reinterpret_cast(io_writer), + io_flags); + } + CATCH_AND_HANDLE +} + int faiss_read_index(FILE* f, int io_flags, FaissIndex** p_out) { try { auto out = faiss::read_index(f, io_flags); @@ -50,6 +65,18 @@ int faiss_read_index_fname( CATCH_AND_HANDLE } +int faiss_read_index_custom( + FaissIOReader* io_reader, + int io_flags, + FaissIndex** p_out) { + try { + auto out = faiss::read_index( + reinterpret_cast(io_reader), io_flags); + *p_out = reinterpret_cast(out); + } + CATCH_AND_HANDLE +} + int faiss_write_index_binary(const FaissIndexBinary* idx, FILE* f) { try { faiss::write_index_binary(reinterpret_cast(idx), f); @@ -67,6 +94,17 @@ int faiss_write_index_binary_fname( CATCH_AND_HANDLE } +int faiss_write_index_binary_custom( + const FaissIndexBinary* idx, + FaissIOWriter* io_writer) { + try { + faiss::write_index_binary( + reinterpret_cast(idx), + reinterpret_cast(io_writer)); + } + CATCH_AND_HANDLE +} + int faiss_read_index_binary(FILE* f, int io_flags, FaissIndexBinary** p_out) { try { auto out = faiss::read_index_binary(f, io_flags); @@ -86,6 +124,18 @@ int faiss_read_index_binary_fname( CATCH_AND_HANDLE } +int faiss_read_index_binary_custom( + FaissIOReader* io_reader, + int io_flags, + FaissIndexBinary** p_out) { + try { + auto out = faiss::read_index_binary( + reinterpret_cast(io_reader), io_flags); + *p_out = reinterpret_cast(out); + } + CATCH_AND_HANDLE +} + int faiss_read_VectorTransform_fname( const char* fname, FaissVectorTransform** p_out) { diff --git a/c_api/index_io_c.h b/c_api/index_io_c.h index fd4da615e5..8e390dc920 100644 --- a/c_api/index_io_c.h +++ b/c_api/index_io_c.h @@ -16,6 +16,7 @@ #include "Index_c.h" #include "VectorTransform_c.h" #include "faiss_c.h" +#include "impl/io_c.h" #ifdef __cplusplus extern "C" { @@ -32,6 +33,13 @@ int faiss_write_index(const FaissIndex* idx, FILE* f); */ int faiss_write_index_fname(const FaissIndex* idx, const char* fname); +/** Write index to a custom writer. + */ +int faiss_write_index_custom( + const FaissIndex* idx, + FaissIOWriter* io_writer, + int io_flags); + #define FAISS_IO_FLAG_MMAP 1 #define FAISS_IO_FLAG_READ_ONLY 2 @@ -45,6 +53,13 @@ int faiss_read_index(FILE* f, int io_flags, FaissIndex** p_out); */ int faiss_read_index_fname(const char* fname, int io_flags, FaissIndex** p_out); +/** Read index from a custom reader. + */ +int faiss_read_index_custom( + FaissIOReader* io_reader, + int io_flags, + FaissIndex** p_out); + /** Write index to a file. * This is equivalent to `faiss::write_index_binary` when a file descriptor is * provided. @@ -59,6 +74,12 @@ int faiss_write_index_binary_fname( const FaissIndexBinary* idx, const char* fname); +/** Write binary index to a custom writer. + */ +int faiss_write_index_binary_custom( + const FaissIndexBinary* idx, + FaissIOWriter* io_writer); + /** Read index from a file. * This is equivalent to `faiss:read_index_binary` when a file descriptor is * given. @@ -73,6 +94,13 @@ int faiss_read_index_binary_fname( int io_flags, FaissIndexBinary** p_out); +/** Read binary index from a custom reader. + */ +int faiss_read_index_binary_custom( + FaissIOReader* io_reader, + int io_flags, + FaissIndexBinary** p_out); + /** Read vector transform from a file. * This is equivalent to `faiss:read_VectorTransform` when a file path is given. */ diff --git a/cmake/thirdparty/fetch_rapids.cmake b/cmake/thirdparty/fetch_rapids.cmake index 2ed6a78cf5..ba0dac02c2 100644 --- a/cmake/thirdparty/fetch_rapids.cmake +++ b/cmake/thirdparty/fetch_rapids.cmake @@ -15,7 +15,7 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -set(RAPIDS_VERSION "24.12") +set(RAPIDS_VERSION "25.04") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/conda/faiss-gpu-cuvs/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh index 37f0381809..3fd46428dd 100644 --- a/conda/faiss-gpu-cuvs/build-lib.sh +++ b/conda/faiss-gpu-cuvs/build-lib.sh @@ -10,6 +10,7 @@ set -e # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so cmake -B _build \ -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ @@ -20,7 +21,7 @@ cmake -B _build \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Release . -make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 +make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 faiss_c faiss_c_avx2 faiss_c_avx512 cmake --install _build --prefix $PREFIX cmake --install _build --prefix _libfaiss_stage/ diff --git a/conda/faiss-gpu-cuvs/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml index a8edf41e58..c4de80bc38 100644 --- a/conda/faiss-gpu-cuvs/meta.yaml +++ b/conda/faiss-gpu-cuvs/meta.yaml @@ -50,7 +50,7 @@ outputs: - {{ compiler('cxx') }} =12.4 - sysroot_linux-64 =2.17 # [linux64] - llvm-openmp # [osx] - - cmake >=3.26.4 + - cmake >=3.30.4 - make =4.2 # [not win] - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] @@ -65,17 +65,18 @@ outputs: host: - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] - - openblas =0.3 # [not x86_64] - - libcuvs =24.12 + - openblas =0.3.29 # [not x86_64] + - libcuvs =25.04 - cuda-version {{ cuda_constraints }} run: - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] - - openblas =0.3 # [not x86_64] + - openblas =0.3.29 # [not x86_64] - cuda-cudart {{ cuda_constraints }} - libcublas {{ libcublas_constraints }} - - libcuvs =24.12 + - libcuvs =25.04 - cuda-version {{ cuda_constraints }} + - libnvjitlink test: requires: - conda-build @@ -100,14 +101,16 @@ outputs: - cmake >=3.26.4 - make =4.2 # [not win] - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - - mkl =2023 # [x86_64] + - mkl =2023.0 # [x86_64] - cuda-toolkit {{ cudatoolkit }} host: + - mkl =2023.0 # [x86_64] - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - python {{ python }} - numpy >=1.19,<2 - {{ pin_subpackage('libfaiss', exact=True) }} run: + - mkl =2023.0 # [x86_64] - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - python {{ python }} - numpy >=1.19,<2 diff --git a/conda/faiss-gpu/build-lib.sh b/conda/faiss-gpu/build-lib.sh index befad80547..71c77e3ca1 100755 --- a/conda/faiss-gpu/build-lib.sh +++ b/conda/faiss-gpu/build-lib.sh @@ -16,6 +16,7 @@ fi # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so cmake -B _build \ -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ @@ -26,7 +27,7 @@ cmake -B _build \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Release . -make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 +make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 faiss_c faiss_c_avx2 faiss_c_avx512 cmake --install _build --prefix $PREFIX cmake --install _build --prefix _libfaiss_stage/ diff --git a/conda/faiss-gpu/meta.yaml b/conda/faiss-gpu/meta.yaml index f15c9556d9..5d5b52c09e 100644 --- a/conda/faiss-gpu/meta.yaml +++ b/conda/faiss-gpu/meta.yaml @@ -57,10 +57,10 @@ outputs: - gcc_linux-64 =11.2 # [cudatoolkit == '11.4.4'] host: - mkl =2023.0 # [x86_64] - - openblas =0.3 # [not x86_64] + - openblas =0.3.29 # [not x86_64] run: - mkl =2023.0 # [x86_64] - - openblas =0.3 # [not x86_64] + - openblas =0.3.29 # [not x86_64] - cuda-cudart {{ cuda_constraints }} - libcublas {{ libcublas_constraints }} test: @@ -89,12 +89,15 @@ outputs: - make =4.4 # [osx and arm64] - _openmp_mutex =4.5=2_kmp_llvm # [x86_64 and not win] - cuda-toolkit {{ cudatoolkit }} + - mkl-devel =2023.0 # [x86_64] host: + - mkl =2023.0 # [x86_64] - python {{ python }} - numpy >=1.19,<2 - _openmp_mutex =4.5=2_kmp_llvm # [x86_64 and not win] - {{ pin_subpackage('libfaiss', exact=True) }} run: + - mkl =2023.0 # [x86_64] - python {{ python }} - numpy >=1.19,<2 - packaging diff --git a/conda/faiss/build-lib-arm64.sh b/conda/faiss/build-lib-arm64.sh index fbc261515c..e08da7d10b 100755 --- a/conda/faiss/build-lib-arm64.sh +++ b/conda/faiss/build-lib-arm64.sh @@ -10,13 +10,15 @@ set -e # Build libfaiss.so cmake -B _build \ -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ -DBUILD_TESTING=OFF \ + -DFAISS_OPT_LEVEL=sve \ -DFAISS_ENABLE_GPU=OFF \ -DFAISS_ENABLE_PYTHON=OFF \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Release . -make -C _build -j$(nproc) faiss +make -C _build -j$(nproc) faiss faiss_sve faiss_c faiss_c_sve cmake --install _build --prefix $PREFIX cmake --install _build --prefix _libfaiss_stage/ diff --git a/conda/faiss/build-lib-osx.sh b/conda/faiss/build-lib-osx.sh index ad099b46e3..3de5f650a1 100755 --- a/conda/faiss/build-lib-osx.sh +++ b/conda/faiss/build-lib-osx.sh @@ -10,6 +10,7 @@ set -e # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so cmake -B _build \ -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=OFF \ @@ -21,7 +22,7 @@ cmake -B _build \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Release . -make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 +make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 faiss_c faiss_c_avx2 faiss_c_avx512 cmake --install _build --prefix $PREFIX cmake --install _build --prefix _libfaiss_stage/ diff --git a/conda/faiss/build-lib.sh b/conda/faiss/build-lib.sh index 8c986d5e68..2db92e890d 100755 --- a/conda/faiss/build-lib.sh +++ b/conda/faiss/build-lib.sh @@ -10,6 +10,7 @@ set -e # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so cmake -B _build \ -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_C_API=ON \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=OFF \ @@ -18,7 +19,7 @@ cmake -B _build \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Release . -make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 +make -C _build -j$(nproc) faiss faiss_avx2 faiss_avx512 faiss_c faiss_c_avx2 faiss_c_avx512 cmake --install _build --prefix $PREFIX cmake --install _build --prefix _libfaiss_stage/ diff --git a/conda/faiss/meta.yaml b/conda/faiss/meta.yaml index 947b136179..81f2cc6501 100644 --- a/conda/faiss/meta.yaml +++ b/conda/faiss/meta.yaml @@ -45,43 +45,37 @@ outputs: - make =4.4 # [osx and arm64] {% if PY_VER == '3.9' or PY_VER == '3.10' or PY_VER == '3.11' %} - mkl-devel =2023.0 # [x86_64] - - liblief =0.12.3 # [not win] - python_abi <3.12 {% elif PY_VER == '3.12' %} - mkl-devel >=2023.2.0 # [x86_64 and not win] - mkl-devel =2023.1.0 # [x86_64 and win] - - liblief =0.15.1 # [not win] - python_abi =3.12 {% endif %} host: - python {{ python }} {% if PY_VER == '3.9' or PY_VER == '3.10' or PY_VER == '3.11' %} - mkl =2023.0 # [x86_64] - - liblief =0.12.3 # [not win] - python_abi <3.12 {% elif PY_VER == '3.12' %} - mkl >=2023.2.0 # [x86_64 and not win] - mkl =2023.1.0 # [x86_64 and win] - - liblief =0.15.1 # [not win] - python_abi =3.12 {% endif %} - - openblas =0.3 # [not x86_64] + - openblas =0.3.29 # [not x86_64] run: - python {{ python }} {% if PY_VER == '3.9' or PY_VER == '3.10' or PY_VER == '3.11' %} - mkl =2023.0 # [x86_64] - - liblief =0.12.3 # [not win] - python_abi <3.12 {% elif PY_VER == '3.12' %} - mkl >=2023.2.0 # [x86_64 and not win] - mkl =2023.1.0 # [x86_64 and win] - - liblief =0.15.1 # [not win] - python_abi =3.12 {% endif %} - - openblas =0.3 # [not x86_64] + - openblas =0.3.29 # [not x86_64] test: requires: - - conda-build + - conda-build =25.1.2 commands: - test -f $PREFIX/lib/libfaiss$SHLIB_EXT # [not win] - test -f $PREFIX/lib/libfaiss_avx2$SHLIB_EXT # [x86_64 and not win] diff --git a/demos/demo_residual_quantizer.cpp b/demos/demo_residual_quantizer.cpp index cf9c0cdf85..2f88be38f1 100644 --- a/demos/demo_residual_quantizer.cpp +++ b/demos/demo_residual_quantizer.cpp @@ -103,7 +103,7 @@ int main() { index.is_trained = true; // override vectors - index.codes = raw_codes; + index.codes = faiss::MaybeOwnedVector(raw_codes); index.ntotal = nb; tic(); diff --git a/faiss/AutoTune.cpp b/faiss/AutoTune.cpp index 35aa6d54af..438559dc3f 100644 --- a/faiss/AutoTune.cpp +++ b/faiss/AutoTune.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include @@ -315,9 +314,6 @@ bool ParameterSpace::combination_ge(size_t c1, size_t c2) const { return true; } -#define DC(classname) \ - const classname* ix = dynamic_cast(index) - static void init_pq_ParameterRange( const ProductQuantizer& pq, ParameterRange& pr) { @@ -341,6 +337,10 @@ ParameterRange& ParameterSpace::add_range(const std::string& name) { return parameter_ranges.back(); } +// Do not use this macro if ix will be unused +#define DC(classname) \ + const classname* ix = dynamic_cast(index) + /// initialize with reasonable parameters for this type of index void ParameterSpace::initialize(const Index* index) { if (DC(IndexPreTransform)) { @@ -396,7 +396,7 @@ void ParameterSpace::initialize(const Index* index) { std::numeric_limits::infinity()); } } - if (DC(IndexIVFPQR)) { + if (dynamic_cast(index)) { ParameterRange& pr = add_range("k_factor"); for (int i = 0; i <= 6; i++) { pr.values.push_back(1 << i); @@ -412,9 +412,6 @@ void ParameterSpace::initialize(const Index* index) { #undef DC -// non-const version -#define DC(classname) classname* ix = dynamic_cast(index) - /// set a combination of parameters on an index void ParameterSpace::set_index_parameters(Index* index, size_t cno) const { for (int i = 0; i < parameter_ranges.size(); i++) { @@ -444,6 +441,10 @@ void ParameterSpace::set_index_parameters( } } +// non-const version +// Do not use this macro if ix will be unused +#define DC(classname) classname* ix = dynamic_cast(index) + void ParameterSpace::set_index_parameter( Index* index, const std::string& name, @@ -576,6 +577,8 @@ void ParameterSpace::set_index_parameter( name.c_str()); } +#undef DC + void ParameterSpace::display() const { printf("ParameterSpace, %zd parameters, %zd combinations:\n", parameter_ranges.size(), diff --git a/faiss/CMakeLists.txt b/faiss/CMakeLists.txt index 32b45c204d..93bc932a4d 100644 --- a/faiss/CMakeLists.txt +++ b/faiss/CMakeLists.txt @@ -28,6 +28,7 @@ set(FAISS_SRC IndexIVFAdditiveQuantizerFastScan.cpp IndexIVFPQFastScan.cpp IndexIVFPQR.cpp + IndexIVFRaBitQ.cpp IndexIVFSpectralHash.cpp IndexLSH.cpp IndexNNDescent.cpp @@ -39,6 +40,7 @@ set(FAISS_SRC IndexIVFIndependentQuantizer.cpp IndexPQFastScan.cpp IndexPreTransform.cpp + IndexRaBitQ.cpp IndexRefine.cpp IndexReplicas.cpp IndexRowwiseMinMax.cpp @@ -61,6 +63,7 @@ set(FAISS_SRC impl/PolysemousTraining.cpp impl/ProductQuantizer.cpp impl/AdditiveQuantizer.cpp + impl/RaBitQuantizer.cpp impl/ResidualQuantizer.cpp impl/LocalSearchQuantizer.cpp impl/ProductAdditiveQuantizer.cpp @@ -70,12 +73,12 @@ set(FAISS_SRC impl/io.cpp impl/kmeans1d.cpp impl/lattice_Zn.cpp + impl/mapped_io.cpp impl/pq4_fast_scan.cpp impl/pq4_fast_scan_search_1.cpp impl/pq4_fast_scan_search_qbs.cpp impl/residual_quantizer_encode_steps.cpp - impl/io.cpp - impl/lattice_Zn.cpp + impl/zerocopy_io.cpp impl/NNDescent.cpp invlists/BlockInvertedLists.cpp invlists/DirectMap.cpp @@ -124,6 +127,7 @@ set(FAISS_HEADERS IndexIVFAdditiveQuantizerFastScan.h IndexIVFPQFastScan.h IndexIVFPQR.h + IndexIVFRaBitQ.h IndexIVFSpectralHash.h IndexLSH.h IndexNeuralNetCodec.h @@ -137,6 +141,7 @@ set(FAISS_HEADERS IndexPreTransform.h IndexRefine.h IndexReplicas.h + IndexRaBitQ.h IndexRowwiseMinMax.h IndexScalarQuantizer.h IndexShards.h @@ -160,12 +165,14 @@ set(FAISS_HEADERS impl/LocalSearchQuantizer.h impl/ProductAdditiveQuantizer.h impl/LookupTableScaler.h + impl/maybe_owned_vector.h impl/NNDescent.h impl/NSG.h impl/PolysemousTraining.h impl/ProductQuantizer-inl.h impl/ProductQuantizer.h impl/Quantizer.h + impl/RaBitQuantizer.h impl/ResidualQuantizer.h impl/ResultHandler.h impl/ScalarQuantizer.h @@ -357,6 +364,10 @@ if(WIN32) target_compile_definitions(faiss_sve PRIVATE FAISS_MAIN_LIB) endif() +if(WIN32) + set_target_properties(faiss PROPERTIES LINK_FLAGS "-Wl,--export-all-symbols") +endif() + string(FIND "${CMAKE_CXX_FLAGS}" "FINTEGER" finteger_idx) if (${finteger_idx} EQUAL -1) target_compile_definitions(faiss PRIVATE FINTEGER=int) @@ -439,7 +450,7 @@ if(FAISS_OPT_LEVEL STREQUAL "avx512") ) endif() if(FAISS_OPT_LEVEL STREQUAL "avx512_spr") - install(TARGETS faiss_avx2 faiss_avx512 faiss_avx512_spr + install(TARGETS faiss_avx2 faiss_avx512_spr EXPORT faiss-targets ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} diff --git a/faiss/Clustering.cpp b/faiss/Clustering.cpp index 5a4ad15468..47bd03d797 100644 --- a/faiss/Clustering.cpp +++ b/faiss/Clustering.cpp @@ -34,22 +34,6 @@ Clustering::Clustering(int d, int k) : d(d), k(k) {} Clustering::Clustering(int d, int k, const ClusteringParameters& cp) : ClusteringParameters(cp), d(d), k(k) {} -static double imbalance_factor(int n, int k, int64_t* assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) - hist[assign[i]]++; - - double tot = 0, uf = 0; - - for (int i = 0; i < k; i++) { - tot += hist[i]; - uf += hist[i] * (double)hist[i]; - } - uf = uf * k / (tot * tot); - - return uf; -} - void Clustering::post_process_centroids() { if (spherical) { fvec_renorm_L2(d, k, centroids.data()); diff --git a/faiss/IVFlib.cpp b/faiss/IVFlib.cpp index f6bed2a4c4..bbc022b71b 100644 --- a/faiss/IVFlib.cpp +++ b/faiss/IVFlib.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -16,7 +17,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -198,12 +201,32 @@ static void shift_and_add( memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T)); } +template +static void shift_and_add( + MaybeOwnedVector& dst, + size_t remove, + const MaybeOwnedVector& src) { + if (remove > 0) + memmove(dst.data(), + dst.data() + remove, + (dst.size() - remove) * sizeof(T)); + size_t insert_point = dst.size() - remove; + dst.resize(insert_point + src.size()); + memcpy(dst.data() + insert_point, src.data(), src.size() * sizeof(T)); +} + template static void remove_from_begin(std::vector& v, size_t remove) { if (remove > 0) v.erase(v.begin(), v.begin() + remove); } +template +static void remove_from_begin(MaybeOwnedVector& v, size_t remove) { + if (remove > 0) + v.erase(v.begin(), v.begin() + remove); +} + void SlidingIndexWindow::step(const Index* sub_index, bool remove_oldest) { FAISS_THROW_IF_NOT_MSG( !remove_oldest || n_slice > 0, @@ -519,5 +542,195 @@ void ivf_residual_add_from_flat_codes( index->ntotal += nb; } +int64_t DefaultShardingFunction::operator()(int64_t i, int64_t shard_count) { + return i % shard_count; +} + +void handle_ivf( + faiss::IndexIVF* index, + int64_t shard_count, + const std::string& filename_template, + ShardingFunction* sharding_function, + bool generate_ids) { + std::vector sharded_indexes(shard_count); + auto clone = static_cast(faiss::clone_index(index)); + clone->quantizer->reset(); + for (int64_t i = 0; i < shard_count; i++) { + sharded_indexes[i] = + static_cast(faiss::clone_index(clone)); + if (generate_ids) { + // Assume the quantizer does not natively support add_with_ids. + sharded_indexes[i]->quantizer = + new IndexIDMap2(sharded_indexes[i]->quantizer); + } + } + + // assign centroids to each sharded Index based on sharding_function, and + // add them to the quantizer of each sharded index + std::vector> sharded_centroids(shard_count); + std::vector> xids(shard_count); + for (int64_t i = 0; i < index->quantizer->ntotal; i++) { + int64_t shard_id = (*sharding_function)(i, shard_count); + // Since the quantizer does not natively support add_with_ids, we simply + // generate them. + xids[shard_id].push_back(i); + float* reconstructed = new float[index->quantizer->d]; + index->quantizer->reconstruct(i, reconstructed); + sharded_centroids[shard_id].insert( + sharded_centroids[shard_id].end(), + &reconstructed[0], + &reconstructed[index->quantizer->d]); + delete[] reconstructed; + } + for (int64_t i = 0; i < shard_count; i++) { + if (generate_ids) { + sharded_indexes[i]->quantizer->add_with_ids( + sharded_centroids[i].size() / index->quantizer->d, + sharded_centroids[i].data(), + xids[i].data()); + } else { + sharded_indexes[i]->quantizer->add( + sharded_centroids[i].size() / index->quantizer->d, + sharded_centroids[i].data()); + } + } + + for (int64_t i = 0; i < shard_count; i++) { + char fname[256]; + snprintf(fname, 256, filename_template.c_str(), i); + faiss::write_index(sharded_indexes[i], fname); + } + + for (int64_t i = 0; i < shard_count; i++) { + delete sharded_indexes[i]; + } +} + +void handle_binary_ivf( + faiss::IndexBinaryIVF* index, + int64_t shard_count, + const std::string& filename_template, + ShardingFunction* sharding_function, + bool generate_ids) { + std::vector sharded_indexes(shard_count); + + auto clone = static_cast( + faiss::clone_binary_index(index)); + clone->quantizer->reset(); + + for (int64_t i = 0; i < shard_count; i++) { + sharded_indexes[i] = static_cast( + faiss::clone_binary_index(clone)); + if (generate_ids) { + // Assume the quantizer does not natively support add_with_ids. + sharded_indexes[i]->quantizer = + new IndexBinaryIDMap2(sharded_indexes[i]->quantizer); + } + } + + // assign centroids to each sharded Index based on sharding_function, and + // add them to the quantizer of each sharded index + int64_t reconstruction_size = index->quantizer->d / 8; + std::vector> sharded_centroids(shard_count); + std::vector> xids(shard_count); + for (int64_t i = 0; i < index->quantizer->ntotal; i++) { + int64_t shard_id = (*sharding_function)(i, shard_count); + // Since the quantizer does not natively support add_with_ids, we simply + // generate them. + xids[shard_id].push_back(i); + uint8_t* reconstructed = new uint8_t[reconstruction_size]; + index->quantizer->reconstruct(i, reconstructed); + sharded_centroids[shard_id].insert( + sharded_centroids[shard_id].end(), + &reconstructed[0], + &reconstructed[reconstruction_size]); + delete[] reconstructed; + } + for (int64_t i = 0; i < shard_count; i++) { + if (generate_ids) { + sharded_indexes[i]->quantizer->add_with_ids( + sharded_centroids[i].size() / reconstruction_size, + sharded_centroids[i].data(), + xids[i].data()); + } else { + sharded_indexes[i]->quantizer->add( + sharded_centroids[i].size() / reconstruction_size, + sharded_centroids[i].data()); + } + } + + for (int64_t i = 0; i < shard_count; i++) { + char fname[256]; + snprintf(fname, 256, filename_template.c_str(), i); + faiss::write_index_binary(sharded_indexes[i], fname); + } + + for (int64_t i = 0; i < shard_count; i++) { + delete sharded_indexes[i]; + } +} + +template +void sharding_helper( + IndexType* index, + int64_t shard_count, + const std::string& filename_template, + ShardingFunction* sharding_function, + bool generate_ids) { + FAISS_THROW_IF_MSG(index->quantizer->ntotal == 0, "No centroids to shard."); + FAISS_THROW_IF_MSG( + filename_template.find("%d") == std::string::npos, + "Invalid filename_template. Must contain format specifier for shard count."); + + DefaultShardingFunction default_sharding_function; + if (sharding_function == nullptr) { + sharding_function = &default_sharding_function; + } + + if (typeid(IndexType) == typeid(faiss::IndexIVF)) { + handle_ivf( + dynamic_cast(index), + shard_count, + filename_template, + sharding_function, + generate_ids); + } else if (typeid(IndexType) == typeid(faiss::IndexBinaryIVF)) { + handle_binary_ivf( + dynamic_cast(index), + shard_count, + filename_template, + sharding_function, + generate_ids); + } +} + +void shard_ivf_index_centroids( + faiss::IndexIVF* index, + int64_t shard_count, + const std::string& filename_template, + ShardingFunction* sharding_function, + bool generate_ids) { + sharding_helper( + index, + shard_count, + filename_template, + sharding_function, + generate_ids); +} + +void shard_binary_ivf_index_centroids( + faiss::IndexBinaryIVF* index, + int64_t shard_count, + const std::string& filename_template, + ShardingFunction* sharding_function, + bool generate_ids) { + sharding_helper( + index, + shard_count, + filename_template, + sharding_function, + generate_ids); +} + } // namespace ivflib } // namespace faiss diff --git a/faiss/IVFlib.h b/faiss/IVFlib.h index 6f6a590c72..8a6dd3f630 100644 --- a/faiss/IVFlib.h +++ b/faiss/IVFlib.h @@ -14,6 +14,7 @@ * IndexIVFs embedded within an IndexPreTransform. */ +#include #include #include @@ -167,6 +168,47 @@ void ivf_residual_add_from_flat_codes( const uint8_t* codes, int64_t code_size = -1); +struct ShardingFunction { + virtual int64_t operator()(int64_t i, int64_t shard_count) = 0; + virtual ~ShardingFunction() = default; + ShardingFunction() {} + ShardingFunction(const ShardingFunction&) = default; + ShardingFunction(ShardingFunction&&) = default; + ShardingFunction& operator=(const ShardingFunction&) = default; + ShardingFunction& operator=(ShardingFunction&&) = default; +}; +struct DefaultShardingFunction : ShardingFunction { + int64_t operator()(int64_t i, int64_t shard_count) override; +}; + +/** + * Shards an IVF index centroids by the given sharding function, and writes + * the index to the path given by filename_generator. The centroids must already + * be added to the index quantizer. + * + * @param index The IVF index containing centroids to shard. + * @param shard_count Number of shards. + * @param filename_template Template for shard filenames. + * @param sharding_function The function to shard by. The default is ith vector + * mod shard_count. + * @param generate_ids Generates ids using IndexIDMap2. If true, ids will + * match the default ids in the unsharded index. + * @return The number of shards written. + */ +void shard_ivf_index_centroids( + IndexIVF* index, + int64_t shard_count = 20, + const std::string& filename_template = "shard.%d.index", + ShardingFunction* sharding_function = nullptr, + bool generate_ids = false); + +void shard_binary_ivf_index_centroids( + faiss::IndexBinaryIVF* index, + int64_t shard_count = 20, + const std::string& filename_template = "shard.%d.index", + ShardingFunction* sharding_function = nullptr, + bool generate_ids = false); + } // namespace ivflib } // namespace faiss diff --git a/faiss/Index.h b/faiss/Index.h index 544086f9ad..2474f08be4 100644 --- a/faiss/Index.h +++ b/faiss/Index.h @@ -17,7 +17,7 @@ #include #define FAISS_VERSION_MAJOR 1 -#define FAISS_VERSION_MINOR 10 +#define FAISS_VERSION_MINOR 11 #define FAISS_VERSION_PATCH 0 // Macro to combine the version components into a single string diff --git a/faiss/IndexBinaryFlat.cpp b/faiss/IndexBinaryFlat.cpp index f6e2e218c0..bbb51d7c93 100644 --- a/faiss/IndexBinaryFlat.cpp +++ b/faiss/IndexBinaryFlat.cpp @@ -37,8 +37,8 @@ void IndexBinaryFlat::search( int32_t* distances, idx_t* labels, const SearchParameters* params) const { - FAISS_THROW_IF_NOT_MSG( - !params, "search params not supported for this index"); + // Extract IDSelector from params if present + const IDSelector* sel = params ? params->sel : nullptr; FAISS_THROW_IF_NOT(k > 0); const idx_t block_size = query_batch_size; @@ -60,7 +60,8 @@ void IndexBinaryFlat::search( ntotal, code_size, /* ordered = */ true, - approx_topk_mode); + approx_topk_mode, + sel); } else { hammings_knn_mc( x + s * code_size, @@ -70,7 +71,8 @@ void IndexBinaryFlat::search( k, code_size, distances + s * k, - labels + s * k); + labels + s * k, + sel); } } } @@ -107,9 +109,9 @@ void IndexBinaryFlat::range_search( int radius, RangeSearchResult* result, const SearchParameters* params) const { - FAISS_THROW_IF_NOT_MSG( - !params, "search params not supported for this index"); - hamming_range_search(x, xb.data(), n, ntotal, radius, code_size, result); + const IDSelector* sel = params ? params->sel : nullptr; + hamming_range_search( + x, xb.data(), n, ntotal, radius, code_size, result, sel); } } // namespace faiss diff --git a/faiss/IndexBinaryFlat.h b/faiss/IndexBinaryFlat.h index f6188322ad..0ce43f3e9d 100644 --- a/faiss/IndexBinaryFlat.h +++ b/faiss/IndexBinaryFlat.h @@ -14,6 +14,7 @@ #include +#include #include namespace faiss { @@ -21,7 +22,7 @@ namespace faiss { /** Index that stores the full vectors and performs exhaustive search. */ struct IndexBinaryFlat : IndexBinary { /// database vectors, size ntotal * d / 8 - std::vector xb; + MaybeOwnedVector xb; /** Select between using a heap or counting to select the k smallest values * when scanning inverted lists. diff --git a/faiss/IndexFlatCodes.cpp b/faiss/IndexFlatCodes.cpp index 61cc995ce9..47854cccf2 100644 --- a/faiss/IndexFlatCodes.cpp +++ b/faiss/IndexFlatCodes.cpp @@ -112,7 +112,7 @@ CodePacker* IndexFlatCodes::get_CodePacker() const { } void IndexFlatCodes::permute_entries(const idx_t* perm) { - std::vector new_codes(codes.size()); + MaybeOwnedVector new_codes(codes.size()); for (idx_t i = 0; i < ntotal; i++) { memcpy(new_codes.data() + i * code_size, diff --git a/faiss/IndexFlatCodes.h b/faiss/IndexFlatCodes.h index 8e5be6c4a5..5ca420b94a 100644 --- a/faiss/IndexFlatCodes.h +++ b/faiss/IndexFlatCodes.h @@ -7,9 +7,11 @@ #pragma once +#include + #include #include -#include +#include namespace faiss { @@ -21,7 +23,7 @@ struct IndexFlatCodes : Index { size_t code_size; /// encoded dataset, size ntotal * code_size - std::vector codes; + MaybeOwnedVector codes; IndexFlatCodes(); diff --git a/faiss/IndexHNSW.cpp b/faiss/IndexHNSW.cpp index a65d68dd35..331df9d025 100644 --- a/faiss/IndexHNSW.cpp +++ b/faiss/IndexHNSW.cpp @@ -8,9 +8,7 @@ #include #include -#include #include -#include #include #include #include @@ -124,7 +122,7 @@ void hnsw_add_vertices( int i1 = n; for (int pt_level = hist.size() - 1; - pt_level >= !index_hnsw.init_level0; + pt_level >= int(!index_hnsw.init_level0); pt_level--) { int i0 = i1 - hist[pt_level]; @@ -212,7 +210,9 @@ IndexHNSW::IndexHNSW(int d, int M, MetricType metric) : Index(d, metric), hnsw(M) {} IndexHNSW::IndexHNSW(Index* storage, int M) - : Index(storage->d, storage->metric_type), hnsw(M), storage(storage) {} + : Index(storage->d, storage->metric_type), hnsw(M), storage(storage) { + metric_arg = storage->metric_arg; +} IndexHNSW::~IndexHNSW() { if (own_fields) { @@ -237,19 +237,19 @@ void hnsw_search( idx_t n, const float* x, BlockResultHandler& bres, - const SearchParameters* params_in) { + const SearchParameters* params) { FAISS_THROW_IF_NOT_MSG( index->storage, "No storage index, please use IndexHNSWFlat (or variants) " "instead of IndexHNSW directly"); - const SearchParametersHNSW* params = nullptr; const HNSW& hnsw = index->hnsw; int efSearch = hnsw.efSearch; - if (params_in) { - params = dynamic_cast(params_in); - FAISS_THROW_IF_NOT_MSG(params, "params type invalid"); - efSearch = params->efSearch; + if (params) { + if (const SearchParametersHNSW* hnsw_params = + dynamic_cast(params)) { + efSearch = hnsw_params->efSearch; + } } size_t n1 = 0, n2 = 0, ndis = 0, nhops = 0; @@ -294,13 +294,13 @@ void IndexHNSW::search( idx_t k, float* distances, idx_t* labels, - const SearchParameters* params_in) const { + const SearchParameters* params) const { FAISS_THROW_IF_NOT(k > 0); using RH = HeapBlockResultHandler; RH bres(n, distances, labels, k); - hnsw_search(this, n, x, bres, params_in); + hnsw_search(this, n, x, bres, params); if (is_similarity_metric(this->metric_type)) { // we need to revert the negated distances @@ -408,17 +408,10 @@ void IndexHNSW::search_level_0( idx_t* labels, int nprobe, int search_type, - const SearchParameters* params_in) const { + const SearchParameters* params) const { FAISS_THROW_IF_NOT(k > 0); FAISS_THROW_IF_NOT(nprobe > 0); - const SearchParametersHNSW* params = nullptr; - - if (params_in) { - params = dynamic_cast(params_in); - FAISS_THROW_IF_NOT_MSG(params, "params type invalid"); - } - storage_idx_t ntotal = hnsw.levels.size(); using RH = HeapBlockResultHandler; diff --git a/faiss/IndexHNSW.h b/faiss/IndexHNSW.h index c796d7e18a..2d983b3c16 100644 --- a/faiss/IndexHNSW.h +++ b/faiss/IndexHNSW.h @@ -138,7 +138,7 @@ struct IndexHNSWPQ : IndexHNSW { void train(idx_t n, const float* x) override; }; -/** SQ index topped with with a HNSW structure to access elements +/** SQ index topped with a HNSW structure to access elements * more efficiently. */ struct IndexHNSWSQ : IndexHNSW { diff --git a/faiss/IndexIVF.cpp b/faiss/IndexIVF.cpp index ece7d0409a..4c98613a3f 100644 --- a/faiss/IndexIVF.cpp +++ b/faiss/IndexIVF.cpp @@ -456,7 +456,7 @@ void IndexIVF::search_preassigned( #pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis, nheap) num_threads(num_omp_threads) { std::unique_ptr scanner( - get_InvertedListScanner(store_pairs, sel)); + get_InvertedListScanner(store_pairs, sel, params)); /***************************************************** * Depending on parallel_mode, there are two possible ways @@ -797,7 +797,7 @@ void IndexIVF::range_search_preassigned( { RangeSearchPartialResult pres(result); std::unique_ptr scanner( - get_InvertedListScanner(store_pairs, sel)); + get_InvertedListScanner(store_pairs, sel, params)); FAISS_THROW_IF_NOT(scanner.get()); all_pres[omp_get_thread_num()] = &pres; @@ -913,7 +913,8 @@ void IndexIVF::range_search_preassigned( InvertedListScanner* IndexIVF::get_InvertedListScanner( bool /*store_pairs*/, - const IDSelector* /* sel */) const { + const IDSelector* /* sel */, + const IVFSearchParameters* /* params */) const { FAISS_THROW_MSG("get_InvertedListScanner not implemented"); } @@ -1297,6 +1298,14 @@ size_t InvertedListScanner::scan_codes( if (!keep_max) { for (size_t j = 0; j < list_size; j++) { + if (sel != nullptr) { + int64_t id = store_pairs ? lo_build(list_no, j) : ids[j]; + if (!sel->is_member(id)) { + codes += code_size; + continue; + } + } + float dis = distance_to_code(codes); if (dis < simi[0]) { int64_t id = store_pairs ? lo_build(list_no, j) : ids[j]; @@ -1307,6 +1316,14 @@ size_t InvertedListScanner::scan_codes( } } else { for (size_t j = 0; j < list_size; j++) { + if (sel != nullptr) { + int64_t id = store_pairs ? lo_build(list_no, j) : ids[j]; + if (!sel->is_member(id)) { + codes += code_size; + continue; + } + } + float dis = distance_to_code(codes); if (dis > simi[0]) { int64_t id = store_pairs ? lo_build(list_no, j) : ids[j]; diff --git a/faiss/IndexIVF.h b/faiss/IndexIVF.h index ebee506f22..5e1748e23b 100644 --- a/faiss/IndexIVF.h +++ b/faiss/IndexIVF.h @@ -312,11 +312,14 @@ struct IndexIVF : Index, IndexIVFInterface { /** Get a scanner for this index (store_pairs means ignore labels) * - * The default search implementation uses this to compute the distances + * The default search implementation uses this to compute the distances. + * Use sel instead of params->sel, because sel is initialized with + * params->sel, but may get overridden by IndexIVF's internal logic. */ virtual InvertedListScanner* get_InvertedListScanner( bool store_pairs = false, - const IDSelector* sel = nullptr) const; + const IDSelector* sel = nullptr, + const IVFSearchParameters* params = nullptr) const; /** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2 */ diff --git a/faiss/IndexIVFAdditiveQuantizer.cpp b/faiss/IndexIVFAdditiveQuantizer.cpp index 154fe4ab66..afc6e92805 100644 --- a/faiss/IndexIVFAdditiveQuantizer.cpp +++ b/faiss/IndexIVFAdditiveQuantizer.cpp @@ -253,7 +253,8 @@ struct AQInvertedListScannerLUT : AQInvertedListScanner { InvertedListScanner* IndexIVFAdditiveQuantizer::get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const { + const IDSelector* sel, + const IVFSearchParameters*) const { FAISS_THROW_IF_NOT(!sel); if (metric_type == METRIC_INNER_PRODUCT) { if (aq->search_type == AdditiveQuantizer::ST_decompress) { diff --git a/faiss/IndexIVFAdditiveQuantizer.h b/faiss/IndexIVFAdditiveQuantizer.h index dfb22d1110..c999a3f79a 100644 --- a/faiss/IndexIVFAdditiveQuantizer.h +++ b/faiss/IndexIVFAdditiveQuantizer.h @@ -52,7 +52,8 @@ struct IndexIVFAdditiveQuantizer : IndexIVF { InvertedListScanner* get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const override; + const IDSelector* sel, + const IVFSearchParameters* params) const override; void sa_decode(idx_t n, const uint8_t* codes, float* x) const override; diff --git a/faiss/IndexIVFFlat.cpp b/faiss/IndexIVFFlat.cpp index eb7b074558..661bae0a9a 100644 --- a/faiss/IndexIVFFlat.cpp +++ b/faiss/IndexIVFFlat.cpp @@ -224,7 +224,8 @@ InvertedListScanner* get_InvertedListScanner1( InvertedListScanner* IndexIVFFlat::get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const { + const IDSelector* sel, + const IVFSearchParameters*) const { if (sel) { return get_InvertedListScanner1(this, store_pairs, sel); } else { diff --git a/faiss/IndexIVFFlat.h b/faiss/IndexIVFFlat.h index 919bca2b25..c298b7b7d2 100644 --- a/faiss/IndexIVFFlat.h +++ b/faiss/IndexIVFFlat.h @@ -44,7 +44,8 @@ struct IndexIVFFlat : IndexIVF { InvertedListScanner* get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const override; + const IDSelector* sel, + const IVFSearchParameters* params) const override; void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons) const override; diff --git a/faiss/IndexIVFPQ.cpp b/faiss/IndexIVFPQ.cpp index 7b8ad7a545..f270efa688 100644 --- a/faiss/IndexIVFPQ.cpp +++ b/faiss/IndexIVFPQ.cpp @@ -1350,7 +1350,8 @@ InvertedListScanner* get_InvertedListScanner2( InvertedListScanner* IndexIVFPQ::get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const { + const IDSelector* sel, + const IVFSearchParameters*) const { if (sel) { return get_InvertedListScanner2(*this, store_pairs, sel); } else { @@ -1425,7 +1426,7 @@ void IndexIVFPQ::compute_distance_to_codes_for_list( float* dist_table) const { std::unique_ptr scanner( - get_InvertedListScanner(true, nullptr)); + get_InvertedListScanner(true, nullptr, nullptr)); if (dist_table) { @@ -1477,7 +1478,7 @@ void IndexIVFPQ::compute_distance_table( float* dist_table) const { std::unique_ptr scanner( - get_InvertedListScanner(true, nullptr)); + get_InvertedListScanner(true, nullptr, nullptr)); scanner->set_query(x); diff --git a/faiss/IndexIVFPQ.h b/faiss/IndexIVFPQ.h index b13c43b116..95d73a2a54 100644 --- a/faiss/IndexIVFPQ.h +++ b/faiss/IndexIVFPQ.h @@ -134,7 +134,8 @@ struct IndexIVFPQ : IndexIVF { InvertedListScanner* get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const override; + const IDSelector* sel, + const IVFSearchParameters* params) const override; /// build precomputed table void precompute_table(); diff --git a/faiss/IndexIVFRaBitQ.cpp b/faiss/IndexIVFRaBitQ.cpp new file mode 100644 index 0000000000..f4e61baf34 --- /dev/null +++ b/faiss/IndexIVFRaBitQ.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +#include +#include +#include +#include + +#include +#include + +namespace faiss { + +IndexIVFRaBitQ::IndexIVFRaBitQ( + Index* quantizer, + const size_t d, + const size_t nlist, + MetricType metric) + : IndexIVF(quantizer, d, nlist, 0, metric), rabitq(d, metric) { + code_size = rabitq.code_size; + invlists->code_size = code_size; + is_trained = false; + + by_residual = true; +} + +IndexIVFRaBitQ::IndexIVFRaBitQ() { + by_residual = true; +} + +void IndexIVFRaBitQ::train_encoder( + idx_t n, + const float* x, + const idx_t* assign) { + rabitq.train(n, x); +} + +void IndexIVFRaBitQ::encode_vectors( + idx_t n, + const float* x, + const idx_t* list_nos, + uint8_t* codes, + bool include_listnos) const { + size_t coarse_size = include_listnos ? coarse_code_size() : 0; + memset(codes, 0, (code_size + coarse_size) * n); + +#pragma omp parallel if (n > 1000) + { + std::vector centroid(d); + +#pragma omp for + for (idx_t i = 0; i < n; i++) { + int64_t list_no = list_nos[i]; + if (list_no >= 0) { + const float* xi = x + i * d; + uint8_t* code = codes + i * (code_size + coarse_size); + + // both by_residual and !by_residual lead to the same code + quantizer->reconstruct(list_no, centroid.data()); + rabitq.compute_codes_core( + xi, code + coarse_size, 1, centroid.data()); + + if (coarse_size) { + encode_listno(list_no, code); + } + } + } + } +} + +void IndexIVFRaBitQ::add_core( + idx_t n, + const float* x, + const idx_t* xids, + const idx_t* precomputed_idx, + void* inverted_list_context) { + FAISS_THROW_IF_NOT(is_trained); + + DirectMapAdd dm_add(direct_map, n, xids); + +#pragma omp parallel + { + std::vector one_code(code_size); + std::vector centroid(d); + + int nt = omp_get_num_threads(); + int rank = omp_get_thread_num(); + + // each thread takes care of a subset of lists + for (size_t i = 0; i < n; i++) { + int64_t list_no = precomputed_idx[i]; + if (list_no >= 0 && list_no % nt == rank) { + int64_t id = xids ? xids[i] : ntotal + i; + + const float* xi = x + i * d; + + // both by_residual and !by_residual lead to the same code + quantizer->reconstruct(list_no, centroid.data()); + rabitq.compute_codes_core( + xi, one_code.data(), 1, centroid.data()); + + size_t ofs = invlists->add_entry( + list_no, id, one_code.data(), inverted_list_context); + + dm_add.add(i, list_no, ofs); + + } else if (rank == 0 && list_no == -1) { + dm_add.add(i, -1, 0); + } + } + } + + ntotal += n; +} + +struct RaBitInvertedListScanner : InvertedListScanner { + const IndexIVFRaBitQ& ivf_rabitq; + + std::vector reconstructed_centroid; + std::vector query_vector; + + std::unique_ptr dc; + + uint8_t qb = 0; + + RaBitInvertedListScanner( + const IndexIVFRaBitQ& ivf_rabitq_in, + bool store_pairs = false, + const IDSelector* sel = nullptr, + uint8_t qb_in = 0) + : InvertedListScanner(store_pairs, sel), + ivf_rabitq{ivf_rabitq_in}, + qb{qb_in} { + keep_max = is_similarity_metric(ivf_rabitq.metric_type); + code_size = ivf_rabitq.code_size; + } + + /// from now on we handle this query. + void set_query(const float* query_vector_in) override { + query_vector.assign(query_vector_in, query_vector_in + ivf_rabitq.d); + + internal_try_setup_dc(); + } + + /// following codes come from this inverted list + void set_list(idx_t list_no, float coarse_dis) override { + this->list_no = list_no; + + reconstructed_centroid.resize(ivf_rabitq.d); + ivf_rabitq.quantizer->reconstruct( + list_no, reconstructed_centroid.data()); + + internal_try_setup_dc(); + } + + /// compute a single query-to-code distance + float distance_to_code(const uint8_t* code) const override { + return dc->distance_to_code(code); + } + + void internal_try_setup_dc() { + if (!query_vector.empty() && !reconstructed_centroid.empty()) { + // both query_vector and centroid are available! + // set up DistanceComputer + dc.reset(ivf_rabitq.rabitq.get_distance_computer( + qb, reconstructed_centroid.data())); + + dc->set_query(query_vector.data()); + } + } +}; + +InvertedListScanner* IndexIVFRaBitQ::get_InvertedListScanner( + bool store_pairs, + const IDSelector* sel, + const IVFSearchParameters* search_params_in) const { + uint8_t used_qb = qb; + if (auto params = dynamic_cast( + search_params_in)) { + used_qb = params->qb; + } + + return new RaBitInvertedListScanner(*this, store_pairs, sel, used_qb); +} + +void IndexIVFRaBitQ::reconstruct_from_offset( + int64_t list_no, + int64_t offset, + float* recons) const { + const uint8_t* code = invlists->get_single_code(list_no, offset); + + std::vector centroid(d); + quantizer->reconstruct(list_no, centroid.data()); + + rabitq.decode_core(code, recons, 1, centroid.data()); +} + +void IndexIVFRaBitQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const { + size_t coarse_size = coarse_code_size(); + +#pragma omp parallel + { + std::vector centroid(d); + +#pragma omp for + for (idx_t i = 0; i < n; i++) { + const uint8_t* code = codes + i * (code_size + coarse_size); + int64_t list_no = decode_listno(code); + float* xi = x + i * d; + + quantizer->reconstruct(list_no, centroid.data()); + rabitq.decode_core(code + coarse_size, xi, 1, centroid.data()); + } + } +} + +struct IVFRaBitDistanceComputer : DistanceComputer { + const float* q = nullptr; + const IndexIVFRaBitQ* parent = nullptr; + + void set_query(const float* x) override; + + float operator()(idx_t i) override; + + float symmetric_dis(idx_t i, idx_t j) override; +}; + +void IVFRaBitDistanceComputer::set_query(const float* x) { + q = x; +} + +float IVFRaBitDistanceComputer::operator()(idx_t i) { + // find the appropriate list + idx_t lo = parent->direct_map.get(i); + uint64_t list_no = lo_listno(lo); + uint64_t offset = lo_offset(lo); + + const uint8_t* code = parent->invlists->get_single_code(list_no, offset); + + // ok, we know the appropriate cluster that we need + std::vector centroid(parent->d); + parent->quantizer->reconstruct(list_no, centroid.data()); + + // compute the distance + float distance = 0; + + std::unique_ptr dc( + parent->rabitq.get_distance_computer(parent->qb, centroid.data())); + dc->set_query(q); + distance = dc->distance_to_code(code); + + // deallocate + parent->invlists->release_codes(list_no, code); + + // done + return distance; +} + +float IVFRaBitDistanceComputer::symmetric_dis(idx_t i, idx_t j) { + FAISS_THROW_MSG("Not implemented"); +} + +DistanceComputer* IndexIVFRaBitQ::get_distance_computer() const { + IVFRaBitDistanceComputer* dc = new IVFRaBitDistanceComputer; + dc->parent = this; + return dc; +} + +} // namespace faiss diff --git a/faiss/IndexIVFRaBitQ.h b/faiss/IndexIVFRaBitQ.h new file mode 100644 index 0000000000..ca42dfc39d --- /dev/null +++ b/faiss/IndexIVFRaBitQ.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +namespace faiss { + +struct IVFRaBitQSearchParameters : IVFSearchParameters { + uint8_t qb = 0; +}; + +// * by_residual is true, just by design +struct IndexIVFRaBitQ : IndexIVF { + RaBitQuantizer rabitq; + + // the default number of bits to quantize a query with. + // use '0' to disable quantization and use raw fp32 values. + uint8_t qb = 0; + + IndexIVFRaBitQ( + Index* quantizer, + const size_t d, + const size_t nlist, + MetricType metric = METRIC_L2); + + IndexIVFRaBitQ(); + + void train_encoder(idx_t n, const float* x, const idx_t* assign) override; + + void encode_vectors( + idx_t n, + const float* x, + const idx_t* list_nos, + uint8_t* codes, + bool include_listnos = false) const override; + + void add_core( + idx_t n, + const float* x, + const idx_t* xids, + const idx_t* precomputed_idx, + void* inverted_list_context = nullptr) override; + + InvertedListScanner* get_InvertedListScanner( + bool store_pairs, + const IDSelector* sel, + const IVFSearchParameters* params) const override; + + void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons) + const override; + + void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override; + + // unfortunately + DistanceComputer* get_distance_computer() const override; +}; + +} // namespace faiss diff --git a/faiss/IndexIVFSpectralHash.cpp b/faiss/IndexIVFSpectralHash.cpp index c0a6c0e914..a4f23256a5 100644 --- a/faiss/IndexIVFSpectralHash.cpp +++ b/faiss/IndexIVFSpectralHash.cpp @@ -301,7 +301,8 @@ struct BuildScanner { InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const { + const IDSelector* sel, + const IVFSearchParameters*) const { FAISS_THROW_IF_NOT(!sel); BuildScanner bs; return dispatch_HammingComputer(code_size, bs, this, store_pairs); diff --git a/faiss/IndexIVFSpectralHash.h b/faiss/IndexIVFSpectralHash.h index 9f11105c11..77541bc6fa 100644 --- a/faiss/IndexIVFSpectralHash.h +++ b/faiss/IndexIVFSpectralHash.h @@ -71,7 +71,8 @@ struct IndexIVFSpectralHash : IndexIVF { InvertedListScanner* get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const override; + const IDSelector* sel, + const IVFSearchParameters* params) const override; /** replace the vector transform for an empty (and possibly untrained) index */ diff --git a/faiss/IndexRaBitQ.cpp b/faiss/IndexRaBitQ.cpp new file mode 100644 index 0000000000..c4025c4ce3 --- /dev/null +++ b/faiss/IndexRaBitQ.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace faiss { + +IndexRaBitQ::IndexRaBitQ() = default; + +IndexRaBitQ::IndexRaBitQ(idx_t d, MetricType metric) + : IndexFlatCodes(0, d, metric), rabitq(d, metric) { + code_size = rabitq.code_size; + + is_trained = false; +} + +void IndexRaBitQ::train(idx_t n, const float* x) { + // compute a centroid + std::vector centroid(d, 0); + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < d; j++) { + centroid[j] += x[i * d + j]; + } + } + + if (n != 0) { + for (size_t j = 0; j < d; j++) { + centroid[j] /= (float)n; + } + } + + center = std::move(centroid); + + // + rabitq.train(n, x); + is_trained = true; +} + +void IndexRaBitQ::sa_encode(idx_t n, const float* x, uint8_t* bytes) const { + FAISS_THROW_IF_NOT(is_trained); + rabitq.compute_codes_core(x, bytes, n, center.data()); +} + +void IndexRaBitQ::sa_decode(idx_t n, const uint8_t* bytes, float* x) const { + FAISS_THROW_IF_NOT(is_trained); + rabitq.decode_core(bytes, x, n, center.data()); +} + +FlatCodesDistanceComputer* IndexRaBitQ::get_FlatCodesDistanceComputer() const { + FlatCodesDistanceComputer* dc = + rabitq.get_distance_computer(qb, center.data()); + dc->code_size = rabitq.code_size; + dc->codes = codes.data(); + return dc; +} + +FlatCodesDistanceComputer* IndexRaBitQ::get_quantized_distance_computer( + const uint8_t qb) const { + FlatCodesDistanceComputer* dc = + rabitq.get_distance_computer(qb, center.data()); + dc->code_size = rabitq.code_size; + dc->codes = codes.data(); + return dc; +} + +namespace { + +struct Run_search_with_dc_res { + using T = void; + + uint8_t qb = 0; + + template + void f(BlockResultHandler& res, const IndexRaBitQ* index, const float* xq) { + size_t ntotal = index->ntotal; + using SingleResultHandler = + typename BlockResultHandler::SingleResultHandler; + const int d = index->d; + +#pragma omp parallel // if (res.nq > 100) + { + std::unique_ptr dc( + index->get_quantized_distance_computer(qb)); + SingleResultHandler resi(res); +#pragma omp for + for (int64_t q = 0; q < res.nq; q++) { + resi.begin(q); + dc->set_query(xq + d * q); + for (size_t i = 0; i < ntotal; i++) { + if (res.is_in_selection(i)) { + float dis = (*dc)(i); + resi.add_result(dis, i); + } + } + resi.end(); + } + } + } +}; + +} // namespace + +void IndexRaBitQ::search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + const SearchParameters* params_in) const { + uint8_t used_qb = qb; + if (auto params = dynamic_cast(params_in)) { + used_qb = params->qb; + } + + const IDSelector* sel = (params_in != nullptr) ? params_in->sel : nullptr; + Run_search_with_dc_res r; + r.qb = used_qb; + + dispatch_knn_ResultHandler( + n, distances, labels, k, metric_type, sel, r, this, x); +} + +void IndexRaBitQ::range_search( + idx_t n, + const float* x, + float radius, + RangeSearchResult* result, + const SearchParameters* params_in) const { + uint8_t used_qb = qb; + if (auto params = dynamic_cast(params_in)) { + used_qb = params->qb; + } + + const IDSelector* sel = (params_in != nullptr) ? params_in->sel : nullptr; + Run_search_with_dc_res r; + r.qb = used_qb; + + dispatch_range_ResultHandler(result, radius, metric_type, sel, r, this, x); +} + +} // namespace faiss diff --git a/faiss/IndexRaBitQ.h b/faiss/IndexRaBitQ.h new file mode 100644 index 0000000000..8d2cb47219 --- /dev/null +++ b/faiss/IndexRaBitQ.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace faiss { + +struct RaBitQSearchParameters : SearchParameters { + uint8_t qb = 0; +}; + +struct IndexRaBitQ : IndexFlatCodes { + RaBitQuantizer rabitq; + + // center of all points + std::vector center; + + // the default number of bits to quantize a query with. + // use '0' to disable quantization and use raw fp32 values. + uint8_t qb = 0; + + IndexRaBitQ(); + + IndexRaBitQ(idx_t d, MetricType metric = METRIC_L2); + + void train(idx_t n, const float* x) override; + + void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override; + void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override; + + // returns a quantized-to-qb bits DC if qb > 0 + // returns a default fp32-based DC if qb == 0 + FlatCodesDistanceComputer* get_FlatCodesDistanceComputer() const override; + + // returns a quantized-to-qb bits DC if qb_in > 0 + // returns a default fp32-based DC if qb_in == 0 + FlatCodesDistanceComputer* get_quantized_distance_computer( + const uint8_t qb_in) const; + + // Don't rely on sa_decode(), bcz it is good for IP, but not for L2. + // As a result, use get_FlatCodesDistanceComputer() for the search. + void search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + const SearchParameters* params = nullptr) const override; + + void range_search( + idx_t n, + const float* x, + float radius, + RangeSearchResult* result, + const SearchParameters* params = nullptr) const override; +}; + +} // namespace faiss diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp index 8c013d0287..d465ce30f5 100644 --- a/faiss/IndexScalarQuantizer.cpp +++ b/faiss/IndexScalarQuantizer.cpp @@ -258,7 +258,8 @@ void IndexIVFScalarQuantizer::add_core( InvertedListScanner* IndexIVFScalarQuantizer::get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const { + const IDSelector* sel, + const IVFSearchParameters*) const { return sq.select_InvertedListScanner( metric_type, quantizer, store_pairs, sel, by_residual); } diff --git a/faiss/IndexScalarQuantizer.h b/faiss/IndexScalarQuantizer.h index fe73536f6a..4617c1b0ce 100644 --- a/faiss/IndexScalarQuantizer.h +++ b/faiss/IndexScalarQuantizer.h @@ -96,7 +96,8 @@ struct IndexIVFScalarQuantizer : IndexIVF { InvertedListScanner* get_InvertedListScanner( bool store_pairs, - const IDSelector* sel) const override; + const IDSelector* sel, + const IVFSearchParameters* params) const override; void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons) const override; diff --git a/faiss/clone_index.cpp b/faiss/clone_index.cpp index 7174cd6ae0..5a1e5cfad2 100644 --- a/faiss/clone_index.cpp +++ b/faiss/clone_index.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -107,6 +109,11 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) { return nullptr; } +IndexBinaryIVF* clone_IndexBinaryIVF(const IndexBinaryIVF* ivf) { + TRYCLONE(IndexBinaryIVF, ivf) + return nullptr; +} + IndexRefine* clone_IndexRefine(const IndexRefine* ir) { TRYCLONE(IndexRefineFlat, ir) TRYCLONE(IndexRefine, ir) { @@ -131,6 +138,11 @@ IndexHNSW* clone_IndexHNSW(const IndexHNSW* ihnsw) { } } +IndexBinaryHNSW* clone_IndexBinaryHNSW(const IndexBinaryHNSW* ihnsw) { + TRYCLONE(IndexBinaryHNSW, ihnsw) + return nullptr; +} + IndexNNDescent* clone_IndexNNDescent(const IndexNNDescent* innd) { TRYCLONE(IndexNNDescentFlat, innd) TRYCLONE(IndexNNDescent, innd) { @@ -323,9 +335,10 @@ Index* Cloner::clone_Index(const Index* index) { IndexNSG* res = clone_IndexNSG(insg); // copy the dynamic allocated graph - auto& new_graph = res->nsg.final_graph; - auto& old_graph = insg->nsg.final_graph; - new_graph = std::make_shared>(*old_graph); + if (auto& old_graph = insg->nsg.final_graph) { + auto& new_graph = res->nsg.final_graph; + new_graph = std::make_shared>(*old_graph); + } res->own_fields = true; res->storage = clone_Index(insg->storage); @@ -385,6 +398,28 @@ Quantizer* clone_Quantizer(const Quantizer* quant) { IndexBinary* clone_binary_index(const IndexBinary* index) { if (auto ii = dynamic_cast(index)) { return new IndexBinaryFlat(*ii); + } else if ( + const IndexBinaryIVF* ivf = + dynamic_cast(index)) { + IndexBinaryIVF* res = clone_IndexBinaryIVF(ivf); + if (ivf->invlists == nullptr) { + res->invlists = nullptr; + } else { + res->invlists = clone_InvertedLists(ivf->invlists); + res->own_invlists = true; + } + + res->own_fields = true; + res->quantizer = clone_binary_index(ivf->quantizer); + + return res; + } else if ( + const IndexBinaryHNSW* ihnsw = + dynamic_cast(index)) { + IndexBinaryHNSW* res = clone_IndexBinaryHNSW(ihnsw); + res->own_fields = true; + res->storage = clone_binary_index(ihnsw->storage); + return res; } else { FAISS_THROW_MSG("cannot clone this type of index"); } diff --git a/faiss/cppcontrib/factory_tools.cpp b/faiss/cppcontrib/factory_tools.cpp index d1f283b8ff..46ffada3e8 100644 --- a/faiss/cppcontrib/factory_tools.cpp +++ b/faiss/cppcontrib/factory_tools.cpp @@ -8,8 +8,22 @@ // -*- c++ -*- #include + #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + namespace faiss { namespace { @@ -122,6 +136,11 @@ std::string reverse_index_factory(const faiss::Index* index) { const faiss::IndexHNSW* hnsw_index = dynamic_cast(index)) { return "HNSW" + std::to_string(get_hnsw_M(hnsw_index)); + } else if ( + const faiss::IndexNSG* nsg_index = + dynamic_cast(index)) { + return "NSG" + std::to_string(nsg_index->nsg.R) + "," + + reverse_index_factory(nsg_index->storage); } else if ( const faiss::IndexRefine* refine_index = dynamic_cast(index)) { diff --git a/faiss/cppcontrib/factory_tools.h b/faiss/cppcontrib/factory_tools.h index f83a6db4ad..20b9237254 100644 --- a/faiss/cppcontrib/factory_tools.h +++ b/faiss/cppcontrib/factory_tools.h @@ -9,20 +9,13 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include namespace faiss { +struct Index; +struct IndexBinary; + std::string reverse_index_factory(const faiss::Index* index); std::string reverse_index_factory(const faiss::IndexBinary* index); diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt index 04d28907d1..0051f047f4 100644 --- a/faiss/gpu/CMakeLists.txt +++ b/faiss/gpu/CMakeLists.txt @@ -258,12 +258,12 @@ if(FAISS_ENABLE_CUVS) utils/CuvsUtils.cu) endif() -add_library(faiss_gpu STATIC ${FAISS_GPU_SRC}) -set_target_properties(faiss_gpu PROPERTIES +add_library(faiss_gpu_objs OBJECT ${FAISS_GPU_SRC}) +set_target_properties(faiss_gpu_objs PROPERTIES POSITION_INDEPENDENT_CODE ON WINDOWS_EXPORT_ALL_SYMBOLS ON ) -target_include_directories(faiss_gpu PUBLIC +target_include_directories(faiss_gpu_objs PUBLIC $) if(FAISS_ENABLE_CUVS) @@ -298,7 +298,7 @@ if(FAISS_ENABLE_CUVS) utils/CuvsUtils.cu TARGET_DIRECTORY faiss PROPERTIES COMPILE_OPTIONS "-fvisibility=hidden") - target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_CUVS=1) + target_compile_definitions(faiss_gpu_objs PUBLIC USE_NVIDIA_CUVS=1) endif() if (FAISS_ENABLE_ROCM) @@ -308,11 +308,13 @@ endif() # Export FAISS_GPU_HEADERS variable to parent scope. set(FAISS_GPU_HEADERS ${FAISS_GPU_HEADERS} PARENT_SCOPE) -target_link_libraries(faiss PRIVATE "$") -target_link_libraries(faiss_avx2 PRIVATE "$") -target_link_libraries(faiss_avx512 PRIVATE "$") -target_link_libraries(faiss_avx512_spr PRIVATE "$") -target_link_libraries(faiss_sve PRIVATE "$") +target_link_libraries(faiss PRIVATE faiss_gpu_objs) +target_link_libraries(faiss_avx2 PRIVATE faiss_gpu_objs) +target_link_libraries(faiss_avx512 PRIVATE faiss_gpu_objs) +target_link_libraries(faiss_avx512_spr PRIVATE faiss_gpu_objs) +target_link_libraries(faiss_sve PRIVATE faiss_gpu_objs) + +install(TARGETS faiss_gpu_objs EXPORT faiss-targets) foreach(header ${FAISS_GPU_HEADERS}) get_filename_component(dir ${header} DIRECTORY ) @@ -322,8 +324,8 @@ foreach(header ${FAISS_GPU_HEADERS}) endforeach() if (FAISS_ENABLE_ROCM) - target_link_libraries(faiss_gpu PRIVATE hip::host roc::hipblas) - target_compile_options(faiss_gpu PRIVATE) + target_link_libraries(faiss_gpu_objs PRIVATE hip::host roc::hipblas) + target_compile_options(faiss_gpu_objs PRIVATE) else() # Prepares a host linker script and enables host linker to support # very large device object files. @@ -338,12 +340,12 @@ else() } ]=] ) - target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") + target_link_options(faiss_gpu_objs PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") find_package(CUDAToolkit REQUIRED) - target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$:cuvs::cuvs> $<$:OpenMP::OpenMP_CXX>) - target_compile_options(faiss_gpu PRIVATE + target_link_libraries(faiss_gpu_objs PRIVATE CUDA::cudart CUDA::cublas $<$:cuvs::cuvs> $<$:OpenMP::OpenMP_CXX>) + target_compile_options(faiss_gpu_objs PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$:-Xcompiler=${OpenMP_CXX_FLAGS}>>) diff --git a/faiss/gpu/GpuAutoTune.cpp b/faiss/gpu/GpuAutoTune.cpp index fed0132d79..7c1ccfc61d 100644 --- a/faiss/gpu/GpuAutoTune.cpp +++ b/faiss/gpu/GpuAutoTune.cpp @@ -28,7 +28,8 @@ using namespace ::faiss; * Parameters to auto-tune on GpuIndex'es **********************************************************/ -#define DC(classname) auto ix = dynamic_cast(index) +#define DC(classname) \ + [[maybe_unused]] auto ix = dynamic_cast(index) void GpuParameterSpace::initialize(const Index* index) { if (DC(IndexPreTransform)) { diff --git a/faiss/gpu/GpuIcmEncoder.cu b/faiss/gpu/GpuIcmEncoder.cu index 999dd998a0..84202aeb7a 100644 --- a/faiss/gpu/GpuIcmEncoder.cu +++ b/faiss/gpu/GpuIcmEncoder.cu @@ -96,7 +96,7 @@ void GpuIcmEncoder::encode( auto fn = [=](int idx, IcmEncoderImpl* encoder) { size_t i0 = idx * base_shard_size + std::min(size_t(idx), n % nshards); size_t ni = base_shard_size; - if (ni < n % nshards) { + if (idx < n % nshards) { ++ni; } if (ni <= 0) { // only if n < nshards diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index fe0c82b8aa..42a6092ddd 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -103,6 +103,10 @@ void GpuIndexCagra::train(idx_t n, const float* x) { this->ntotal = n; } +void GpuIndexCagra::add(idx_t n, const float* x) { + train(n, x); +} + bool GpuIndexCagra::addImplRequiresIDs_() const { return false; }; diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h index d6fae29b58..a0cac805a7 100644 --- a/faiss/gpu/GpuIndexCagra.h +++ b/faiss/gpu/GpuIndexCagra.h @@ -173,8 +173,8 @@ struct GpuIndexCagraConfig : public GpuIndexConfig { /// Number of Iterations to run if building with NN_DESCENT size_t nn_descent_niter = 20; - IVFPQBuildCagraConfig* ivf_pq_params = nullptr; - IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr; + std::shared_ptr ivf_pq_params{nullptr}; + std::shared_ptr ivf_pq_search_params{nullptr}; float refine_rate = 2.0f; bool store_dataset = true; }; @@ -245,7 +245,17 @@ struct GpuIndexCagra : public GpuIndex { faiss::MetricType metric = faiss::METRIC_L2, GpuIndexCagraConfig config = GpuIndexCagraConfig()); - /// Trains CAGRA based on the given vector data + /// Trains CAGRA based on the given vector data and add them along with ids. + /// NB: The use of the add function here is to build the CAGRA graph on + /// the base dataset. Use this function when you want to add vectors with + /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107 + void add(idx_t n, const float* x) override; + + /// Trains CAGRA based on the given vector data. + /// NB: The use of the train function here is to build the CAGRA graph on + /// the base dataset and is currently the only function to add the full set + /// of vectors (without IDs) to the index. There is no external quantizer to + /// be trained here. void train(idx_t n, const float* x) override; /// Initialize ourselves from the given CPU index; will overwrite diff --git a/faiss/gpu/GpuIndexIVFPQ.cu b/faiss/gpu/GpuIndexIVFPQ.cu index da0e5ac8f3..f6e314e9b6 100644 --- a/faiss/gpu/GpuIndexIVFPQ.cu +++ b/faiss/gpu/GpuIndexIVFPQ.cu @@ -566,17 +566,13 @@ void GpuIndexIVFPQ::verifyPQSettings_() const { "Bits per code must be 8 (passed %d)", bitsPerCode_); } - } - - // The number of bytes per encoded vector must be one we support - FAISS_THROW_IF_NOT_FMT( - ivfpqConfig_.interleavedLayout || - IVFPQ::isSupportedPQCodeLength(subQuantizers_), - "Number of bytes per encoded vector / sub-quantizers (%d) " - "is not supported", - subQuantizers_); - - if (!should_use_cuvs(config_)) { + // The number of bytes per encoded vector must be one we support + FAISS_THROW_IF_NOT_FMT( + ivfpqConfig_.interleavedLayout || + IVFPQ::isSupportedPQCodeLength(subQuantizers_), + "Number of bytes per encoded vector / sub-quantizers (%d) " + "is not supported", + subQuantizers_); // Sub-quantizers must evenly divide dimensions available FAISS_THROW_IF_NOT_FMT( this->d % subQuantizers_ == 0, diff --git a/faiss/gpu/StandardGpuResources.cpp b/faiss/gpu/StandardGpuResources.cpp index 39ee38efa9..649b7cb5cf 100644 --- a/faiss/gpu/StandardGpuResources.cpp +++ b/faiss/gpu/StandardGpuResources.cpp @@ -411,7 +411,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) { raftHandles_.emplace(std::make_pair(device, defaultStream)); #endif - cudaStream_t asyncCopyStream = 0; + cudaStream_t asyncCopyStream = nullptr; CUDA_VERIFY( cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking)); diff --git a/faiss/gpu/impl/CuvsCagra.cu b/faiss/gpu/impl/CuvsCagra.cu index 82e3007d59..f60e1e3ab5 100644 --- a/faiss/gpu/impl/CuvsCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -69,6 +69,7 @@ CuvsCagra::CuvsCagra( index_params_.intermediate_graph_degree = intermediate_graph_degree; index_params_.graph_degree = graph_degree; + index_params_.attach_dataset_on_build = store_dataset; if (!ivf_pq_search_params_) { ivf_pq_search_params_ = @@ -243,6 +244,7 @@ void CuvsCagra::search( storage_, n_, dim_); cuvs_index->update_dataset(raft_handle, dataset); } + store_dataset_ = true; } auto queries_view = raft::make_device_matrix_view( diff --git a/faiss/gpu/impl/CuvsCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh index c466aceec4..8e458d8be2 100644 --- a/faiss/gpu/impl/CuvsCagra.cuh +++ b/faiss/gpu/impl/CuvsCagra.cuh @@ -118,8 +118,11 @@ class CuvsCagra { const int dim_; /// Controls the underlying cuVS index if it should store the dataset in - /// device memory - bool store_dataset_; + /// device memory. Default set to true for enabling search capabilities on + /// the index. + /// NB: This is also required to be set to true for deserializing + /// an IndexHNSWCagra object. + bool store_dataset_ = true; /// Metric type of the index faiss::MetricType metric_; diff --git a/faiss/gpu/impl/DistanceUtils.cuh b/faiss/gpu/impl/DistanceUtils.cuh index fd894ae3bd..4ea899c8ec 100644 --- a/faiss/gpu/impl/DistanceUtils.cuh +++ b/faiss/gpu/impl/DistanceUtils.cuh @@ -303,7 +303,7 @@ __global__ void incrementIndex( int k, idx_t increment) { for (idx_t i = blockIdx.y; i < indices.getSize(0); i += gridDim.y) { - for (int j = threadIdx.x; j < k; j += blockDim.x) { + for (auto j = threadIdx.x; j < k; j += blockDim.x) { indices[i][idx_t(blockIdx.x) * k + j] += blockIdx.x * increment; } } diff --git a/faiss/gpu/impl/GpuScalarQuantizer.cuh b/faiss/gpu/impl/GpuScalarQuantizer.cuh index c2d781419d..186ecac1c2 100644 --- a/faiss/gpu/impl/GpuScalarQuantizer.cuh +++ b/faiss/gpu/impl/GpuScalarQuantizer.cuh @@ -377,7 +377,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) @@ -587,7 +587,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) @@ -753,7 +753,7 @@ struct Codec { smemVmin = smem; smemVdiff = smem + dim; - for (int i = threadIdx.x; i < dim; i += blockDim.x) { + for (auto i = threadIdx.x; i < dim; i += blockDim.x) { // We are performing vmin + vdiff * (v + 0.5) / (2^bits - 1) // This can be simplified to vmin' + vdiff' * v where: // vdiff' = vdiff / (2^bits - 1) diff --git a/faiss/gpu/impl/IVFAppend.cu b/faiss/gpu/impl/IVFAppend.cu index ba5cedf3c7..dd1c9073b5 100644 --- a/faiss/gpu/impl/IVFAppend.cu +++ b/faiss/gpu/impl/IVFAppend.cu @@ -368,9 +368,9 @@ __global__ void ivfInterleavedAppend( // The set of addresses for each of the lists void** listData) { // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - int laneId = threadIdx.x % kWarpSize; - int warpId = threadIdx.x / kWarpSize; - int warpsPerBlock = blockDim.x / kWarpSize; + auto laneId = threadIdx.x % kWarpSize; + auto warpId = threadIdx.x / kWarpSize; + auto warpsPerBlock = blockDim.x / kWarpSize; // Each block is dedicated to a separate list idx_t listId = uniqueLists[blockIdx.x]; diff --git a/faiss/gpu/impl/IVFFlatScan.cu b/faiss/gpu/impl/IVFFlatScan.cu index 457d0afeb6..5c6307b032 100644 --- a/faiss/gpu/impl/IVFFlatScan.cu +++ b/faiss/gpu/impl/IVFFlatScan.cu @@ -65,9 +65,9 @@ struct IVFFlatScan { int limit = utils::divDown(dim, Codec::kDimPerIter); // Each warp handles a separate chunk of vectors - int warpId = threadIdx.x / kWarpSize; + auto warpId = threadIdx.x / kWarpSize; // FIXME: why does getLaneId() not work when we write out below!?!?! - int laneId = threadIdx.x % kWarpSize; // getLaneId(); + auto laneId = threadIdx.x % kWarpSize; // getLaneId(); // Divide the set of vectors among the warps idx_t vecsPerWarp = utils::divUp(numVecs, kIVFFlatScanWarps); diff --git a/faiss/gpu/impl/IVFInterleaved.cu b/faiss/gpu/impl/IVFInterleaved.cu index e5b13f3aa8..fc99a49163 100644 --- a/faiss/gpu/impl/IVFInterleaved.cu +++ b/faiss/gpu/impl/IVFInterleaved.cu @@ -27,7 +27,7 @@ __global__ void ivfInterleavedScan2( Tensor distanceOut, Tensor indicesOut) { if constexpr ((NumWarpQ == 1 && NumThreadQ == 1) || NumWarpQ >= kWarpSize) { - int queryId = blockIdx.x; + auto queryId = blockIdx.x; constexpr int kNumWarps = ThreadsPerBlock / kWarpSize; @@ -99,7 +99,7 @@ __global__ void ivfInterleavedScan2( // Merge all final results heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { // Re-adjust the value we are selecting based on the sorting order distanceOut[queryId][i] = smemK[i] * adj; auto packedIndex = smemV[i]; diff --git a/faiss/gpu/impl/IVFInterleaved.cuh b/faiss/gpu/impl/IVFInterleaved.cuh index f1da8342d4..1b7fbbe7a1 100644 --- a/faiss/gpu/impl/IVFInterleaved.cuh +++ b/faiss/gpu/impl/IVFInterleaved.cuh @@ -56,7 +56,7 @@ __global__ void ivfInterleavedScan( for (idx_t queryId = blockIdx.y; queryId < queries.getSize(0); queryId += gridDim.y) { - int probeId = blockIdx.x; + auto probeId = blockIdx.x; idx_t listId = listIds[queryId][probeId]; // Safety guard in case NaNs in input cause no list ID to be @@ -69,8 +69,8 @@ __global__ void ivfInterleavedScan( int dim = queries.getSize(1); // FIXME: some issue with getLaneId() and CUDA 10.1 and P4 GPUs? - int laneId = threadIdx.x % kWarpSize; - int warpId = threadIdx.x / kWarpSize; + auto laneId = threadIdx.x % kWarpSize; + auto warpId = threadIdx.x / kWarpSize; using EncodeT = typename Codec::EncodeT; @@ -215,7 +215,7 @@ __global__ void ivfInterleavedScan( auto distanceOutBase = distanceOut[queryId][probeId].data(); auto indicesOutBase = indicesOut[queryId][probeId].data(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { distanceOutBase[i] = smemK[i]; indicesOutBase[i] = smemV[i]; } diff --git a/faiss/gpu/impl/IVFUtilsSelect1.cu b/faiss/gpu/impl/IVFUtilsSelect1.cu index 3cb88bd9c7..c4f65bab8f 100644 --- a/faiss/gpu/impl/IVFUtilsSelect1.cu +++ b/faiss/gpu/impl/IVFUtilsSelect1.cu @@ -90,7 +90,7 @@ __global__ void pass1SelectLists( // Write out the final k-selected values; they should be all // together - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { heapDistances[queryId][sliceId][i] = smemK[i]; heapIndices[queryId][sliceId][i] = idx_t(smemV[i]); } diff --git a/faiss/gpu/impl/IVFUtilsSelect2.cu b/faiss/gpu/impl/IVFUtilsSelect2.cu index 3a94101bdb..2dbf3c0f00 100644 --- a/faiss/gpu/impl/IVFUtilsSelect2.cu +++ b/faiss/gpu/impl/IVFUtilsSelect2.cu @@ -100,7 +100,7 @@ __global__ void pass2SelectLists( // Merge all final results heap.reduce(); - for (int i = threadIdx.x; i < k; i += blockDim.x) { + for (auto i = threadIdx.x; i < k; i += blockDim.x) { outDistances[queryId][i] = smemK[i]; // `v` is the index in `heapIndices` diff --git a/faiss/gpu/impl/IcmEncoder.cu b/faiss/gpu/impl/IcmEncoder.cu index 20ff36e2d2..b86e390f85 100644 --- a/faiss/gpu/impl/IcmEncoder.cu +++ b/faiss/gpu/impl/IcmEncoder.cu @@ -46,8 +46,8 @@ __global__ void runIcmEncodeStep( int m) { using KVPair = Pair; - int id = blockIdx.x; // each block takes care of one vector - int code = threadIdx.x; // each thread takes care of one possible code + auto id = blockIdx.x; // each block takes care of one vector + auto code = threadIdx.x; // each thread takes care of one possible code // compute the objective value by look-up tables KVPair obj(0.0f, code); @@ -94,8 +94,8 @@ __global__ void runEvaluation( int M, int K, int dims) { - int id = blockIdx.x; // each block takes care of one vector - int d = threadIdx.x; // each thread takes care of one dimension + auto id = blockIdx.x; // each block takes care of one vector + auto d = threadIdx.x; // each thread takes care of one dimension float acc = 0.0f; #pragma unroll @@ -136,7 +136,7 @@ __global__ void runCodesPerturbation( int K, int nperts) { // each thread takes care of one vector - int id = blockIdx.x * blockDim.x + threadIdx.x; + auto id = blockIdx.x * blockDim.x + threadIdx.x; if (id >= n) { return; @@ -173,7 +173,7 @@ __global__ void runCodesSelection( int n, int M) { // each thread takes care of one vector - int id = blockIdx.x * blockDim.x + threadIdx.x; + auto id = blockIdx.x * blockDim.x + threadIdx.x; if (id >= n || objs[id] >= bestObjs[id]) { return; @@ -195,8 +195,8 @@ __global__ void runCodesSelection( * @param K number of codewords in a codebook */ __global__ void runNormAddition(float* uterm, const float* norm, int K) { - int id = blockIdx.x; - int code = threadIdx.x; + auto id = blockIdx.x; + auto code = threadIdx.x; uterm[id * K + code] += norm[code]; } diff --git a/faiss/gpu/impl/L2Norm.cu b/faiss/gpu/impl/L2Norm.cu index e76a0831ff..0e65015e44 100644 --- a/faiss/gpu/impl/L2Norm.cu +++ b/faiss/gpu/impl/L2Norm.cu @@ -40,7 +40,7 @@ __global__ void l2NormRowMajor( // these are fine to be int (just based on block dimensions) int numWarps = utils::divUp(blockDim.x, kWarpSize); int laneId = getLaneId(); - int warpId = threadIdx.x / kWarpSize; + auto warpId = threadIdx.x / kWarpSize; bool lastRowTile = (blockIdx.x == (gridDim.x - 1)); idx_t rowStart = idx_t(blockIdx.x) * RowTileSize; diff --git a/faiss/gpu/test/CMakeLists.txt b/faiss/gpu/test/CMakeLists.txt index c549af3947..6b86695976 100644 --- a/faiss/gpu/test/CMakeLists.txt +++ b/faiss/gpu/test/CMakeLists.txt @@ -43,6 +43,7 @@ faiss_gpu_test(TestGpuIndexFlat.cpp) faiss_gpu_test(TestGpuIndexIVFFlat.cpp) faiss_gpu_test(TestGpuIndexBinaryFlat.cpp) faiss_gpu_test(TestGpuMemoryException.cpp) +faiss_gpu_test(TestGpuIcmEncoder.cpp) faiss_gpu_test(TestGpuIndexIVFPQ.cpp) faiss_gpu_test(TestGpuIndexIVFScalarQuantizer.cpp) faiss_gpu_test(TestGpuResidualQuantizer.cpp) diff --git a/faiss/gpu/test/TestGpuIcmEncoder.cpp b/faiss/gpu/test/TestGpuIcmEncoder.cpp new file mode 100644 index 0000000000..0c793d4f58 --- /dev/null +++ b/faiss/gpu/test/TestGpuIcmEncoder.cpp @@ -0,0 +1,112 @@ +#include +#include +#include +#include + +#include +#include +#include + +using faiss::LocalSearchQuantizer; +using faiss::gpu::GpuIcmEncoder; +using faiss::gpu::GpuResourcesProvider; +using faiss::gpu::StandardGpuResources; + +struct ShardingTestParams { + size_t n; + size_t nshards; +}; + +class GpuIcmEncoderShardingTest + : public ::testing::TestWithParam { + protected: + void SetUp() override { + params = GetParam(); + + lsq.M = 4; + lsq.K = 16; + lsq.d = 32; + + std::uniform_real_distribution dist(-1.0f, 1.0f); + lsq.codebooks.resize(lsq.M * lsq.K * lsq.d); + for (auto& v : lsq.codebooks) { + v = dist(gen); + } + + x.resize(params.n * lsq.d); + codes.resize(params.n * lsq.M); + + for (auto& v : x) { + v = dist(gen); + } + + std::uniform_int_distribution codeDist(0, lsq.K - 1); + for (auto& c : codes) { + c = codeDist(gen); + } + } + + LocalSearchQuantizer lsq; + std::vector x; + std::vector codes; + std::mt19937 gen; + ShardingTestParams params; + static constexpr size_t ils_iters = 4; +}; + +TEST_P(GpuIcmEncoderShardingTest, DataShardingCorrectness) { + std::vector resources(params.nshards); + std::vector provs; + std::vector devices; + + for (size_t i = 0; i < params.nshards; ++i) { + resources[i].noTempMemory(); + provs.push_back(&resources[i]); + devices.push_back(0); // use GPU 0 for testing all shards + } + + GpuIcmEncoder encoder(&lsq, provs, devices); + encoder.set_binary_term(); + + gen.seed(42); + EXPECT_NO_THROW( + encoder.encode(codes.data(), x.data(), gen, params.n, ils_iters)); + + for (auto c : codes) { + EXPECT_GE(c, 0); + EXPECT_LT(c, lsq.K); + } +} + +std::vector GetShardingTestCases() { + return { + {1, 8}, + + {5, 4}, + + {10, 2}, + {10, 3}, + {10, 5}, + {10, 8}, + + {20, 8}, + }; +} + +INSTANTIATE_TEST_SUITE_P( + MultiGpuShardingTests, + GpuIcmEncoderShardingTest, + ::testing::ValuesIn(GetShardingTestCases()), + [](const ::testing::TestParamInfo& info) { + return "n" + std::to_string(info.param.n) + "_shards" + + std::to_string(info.param.nshards); + }); + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + + // just run with a fixed test seed + faiss::gpu::setTestSeed(100); + + return RUN_ALL_TESTS(); +} diff --git a/faiss/gpu/utils/DeviceUtils.cu b/faiss/gpu/utils/DeviceUtils.cu index 15036d39fb..123c8e05e8 100644 --- a/faiss/gpu/utils/DeviceUtils.cu +++ b/faiss/gpu/utils/DeviceUtils.cu @@ -30,7 +30,7 @@ void setCurrentDevice(int device) { int getNumDevices() { int numDev = -1; cudaError_t err = cudaGetDeviceCount(&numDev); - if (cudaErrorNoDevice == err) { + if (cudaErrorNoDevice == err || cudaErrorInsufficientDriver == err) { numDev = 0; } else { CUDA_VERIFY(err); diff --git a/faiss/impl/HNSW.cpp b/faiss/impl/HNSW.cpp index 10bd813435..2dad791cec 100644 --- a/faiss/impl/HNSW.cpp +++ b/faiss/impl/HNSW.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -32,6 +31,7 @@ namespace faiss { **************************************************************/ int HNSW::nb_neighbors(int layer_no) const { + FAISS_THROW_IF_NOT(layer_no + 1 < cum_nneighbor_per_level.size()); return cum_nneighbor_per_level[layer_no + 1] - cum_nneighbor_per_level[layer_no]; } @@ -590,15 +590,22 @@ int search_from_candidates( HNSWStats& stats, int level, int nres_in, - const SearchParametersHNSW* params) { + const SearchParameters* params) { int nres = nres_in; int ndis = 0; // can be overridden by search params - bool do_dis_check = params ? params->check_relative_distance - : hnsw.check_relative_distance; - int efSearch = params ? params->efSearch : hnsw.efSearch; - const IDSelector* sel = params ? params->sel : nullptr; + bool do_dis_check = hnsw.check_relative_distance; + int efSearch = hnsw.efSearch; + const IDSelector* sel = nullptr; + if (params) { + if (const SearchParametersHNSW* hnsw_params = + dynamic_cast(params)) { + do_dis_check = hnsw_params->check_relative_distance; + efSearch = hnsw_params->efSearch; + } + sel = params->sel; + } C::T threshold = res.threshold; for (int i = 0; i < candidates.size(); i++) { @@ -920,15 +927,22 @@ HNSWStats HNSW::search( DistanceComputer& qdis, ResultHandler& res, VisitedTable& vt, - const SearchParametersHNSW* params) const { + const SearchParameters* params) const { HNSWStats stats; if (entry_point == -1) { return stats; } int k = extract_k_from_ResultHandler(res); - bool bounded_queue = - params ? params->bounded_queue : this->search_bounded_queue; + bool bounded_queue = this->search_bounded_queue; + int efSearch = this->efSearch; + if (params) { + if (const SearchParametersHNSW* hnsw_params = + dynamic_cast(params)) { + bounded_queue = hnsw_params->bounded_queue; + efSearch = hnsw_params->efSearch; + } + } // greedy search on upper levels storage_idx_t nearest = entry_point; @@ -940,7 +954,7 @@ HNSWStats HNSW::search( stats.combine(local_stats); } - int ef = std::max(params ? params->efSearch : efSearch, k); + int ef = std::max(efSearch, k); if (bounded_queue) { // this is the most common branch MinimaxHeap candidates(ef); @@ -980,9 +994,17 @@ void HNSW::search_level_0( int search_type, HNSWStats& search_stats, VisitedTable& vt, - const SearchParametersHNSW* params) const { + const SearchParameters* params) const { const HNSW& hnsw = *this; - auto efSearch = params ? params->efSearch : hnsw.efSearch; + + auto efSearch = hnsw.efSearch; + if (params) { + if (const SearchParametersHNSW* hnsw_params = + dynamic_cast(params)) { + efSearch = hnsw_params->efSearch; + } + } + int k = extract_k_from_ResultHandler(res); if (search_type == 1) { @@ -1062,7 +1084,7 @@ void HNSW::permute_entries(const idx_t* map) { // swap everyone std::swap(levels, new_levels); std::swap(offsets, new_offsets); - std::swap(neighbors, new_neighbors); + neighbors = std::move(new_neighbors); } /************************************************************** diff --git a/faiss/impl/HNSW.h b/faiss/impl/HNSW.h index f80fefc2e7..c736588229 100644 --- a/faiss/impl/HNSW.h +++ b/faiss/impl/HNSW.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -122,7 +123,7 @@ struct HNSW { /// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i /// for all levels. this is where all storage goes. - std::vector neighbors; + MaybeOwnedVector neighbors; /// entry point in the search structure (one of the points with maximum /// level @@ -201,7 +202,7 @@ struct HNSW { DistanceComputer& qdis, ResultHandler& res, VisitedTable& vt, - const SearchParametersHNSW* params = nullptr) const; + const SearchParameters* params = nullptr) const; /// search only in level 0 from a given vertex void search_level_0( @@ -213,7 +214,7 @@ struct HNSW { int search_type, HNSWStats& search_stats, VisitedTable& vt, - const SearchParametersHNSW* params = nullptr) const; + const SearchParameters* params = nullptr) const; void reset(); @@ -265,7 +266,7 @@ int search_from_candidates( HNSWStats& stats, int level, int nres_in = 0, - const SearchParametersHNSW* params = nullptr); + const SearchParameters* params = nullptr); HNSWStats greedy_update_nearest( const HNSW& hnsw, diff --git a/faiss/impl/NNDescent.cpp b/faiss/impl/NNDescent.cpp index 3d707be067..9701142ddd 100644 --- a/faiss/impl/NNDescent.cpp +++ b/faiss/impl/NNDescent.cpp @@ -400,7 +400,7 @@ void NNDescent::build(DistanceComputer& qdis, const int n, bool verbose) { init_graph(qdis); nndescent(qdis, verbose); - final_graph.resize(ntotal * K); + final_graph.resize(uint64_t(ntotal) * K); // Store the neighbor link structure into final_graph // Clear the old graph diff --git a/faiss/impl/RaBitQuantizer.cpp b/faiss/impl/RaBitQuantizer.cpp new file mode 100644 index 0000000000..8261a9a86c --- /dev/null +++ b/faiss/impl/RaBitQuantizer.cpp @@ -0,0 +1,519 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace faiss { + +struct FactorsData { + // ||or - c||^2 - ((metric==IP) ? ||or||^2 : 0) + float or_minus_c_l2sqr = 0; + float dp_multiplier = 0; +}; + +struct QueryFactorsData { + float c1 = 0; + float c2 = 0; + float c34 = 0; + + float qr_to_c_L2sqr = 0; + float qr_norm_L2sqr = 0; +}; + +static size_t get_code_size(const size_t d) { + return (d + 7) / 8 + sizeof(FactorsData); +} + +RaBitQuantizer::RaBitQuantizer(size_t d, MetricType metric) + : Quantizer(d, get_code_size(d)), metric_type{metric} {} + +void RaBitQuantizer::train(size_t n, const float* x) { + // does nothing +} + +void RaBitQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) + const { + compute_codes_core(x, codes, n, centroid); +} + +void RaBitQuantizer::compute_codes_core( + const float* x, + uint8_t* codes, + size_t n, + const float* centroid_in) const { + FAISS_ASSERT(codes != nullptr); + FAISS_ASSERT(x != nullptr); + FAISS_ASSERT( + (metric_type == MetricType::METRIC_L2 || + metric_type == MetricType::METRIC_INNER_PRODUCT)); + + if (n == 0) { + return; + } + + // compute some helper constants + const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d)); + + // compute codes +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + // ||or - c||^2 + float norm_L2sqr = 0; + // ||or||^2, which is equal to ||P(or)||^2 and ||P^(-1)(or)||^2 + float or_L2sqr = 0; + // dot product + float dp_oO = 0; + + // the code + uint8_t* code = codes + i * code_size; + FactorsData* fac = reinterpret_cast(code + (d + 7) / 8); + + // cleanup it + if (code != nullptr) { + memset(code, 0, code_size); + } + + for (size_t j = 0; j < d; j++) { + const float or_minus_c = x[i * d + j] - + ((centroid_in == nullptr) ? 0 : centroid_in[j]); + norm_L2sqr += or_minus_c * or_minus_c; + or_L2sqr += x[i * d + j] * x[i * d + j]; + + const bool xb = (or_minus_c > 0); + + dp_oO += xb ? or_minus_c : (-or_minus_c); + + // store the output data + if (code != nullptr) { + if (xb) { + // enable a particular bit + code[j / 8] |= (1 << (j % 8)); + } + } + } + + // compute factors + + // compute the inverse norm + const float inv_norm_L2 = + (std::abs(norm_L2sqr) < std::numeric_limits::epsilon()) + ? 1.0f + : (1.0f / std::sqrt(norm_L2sqr)); + dp_oO *= inv_norm_L2; + dp_oO *= inv_d_sqrt; + + const float inv_dp_oO = + (std::abs(dp_oO) < std::numeric_limits::epsilon()) + ? 1.0f + : (1.0f / dp_oO); + + fac->or_minus_c_l2sqr = norm_L2sqr; + if (metric_type == MetricType::METRIC_INNER_PRODUCT) { + fac->or_minus_c_l2sqr -= or_L2sqr; + } + + fac->dp_multiplier = inv_dp_oO * std::sqrt(norm_L2sqr); + } +} + +void RaBitQuantizer::decode(const uint8_t* codes, float* x, size_t n) const { + decode_core(codes, x, n, centroid); +} + +void RaBitQuantizer::decode_core( + const uint8_t* codes, + float* x, + size_t n, + const float* centroid_in) const { + FAISS_ASSERT(codes != nullptr); + FAISS_ASSERT(x != nullptr); + + const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d)); + +#pragma omp parallel for if (n > 1000) + for (int64_t i = 0; i < n; i++) { + const uint8_t* code = codes + i * code_size; + + // split the code into parts + const uint8_t* binary_data = code; + const FactorsData* fac = + reinterpret_cast(code + (d + 7) / 8); + + // + for (size_t j = 0; j < d; j++) { + // extract i-th bit + const uint8_t masker = (1 << (j % 8)); + const float bit = ((binary_data[j / 8] & masker) == masker) ? 1 : 0; + + // compute the output code + x[i * d + j] = (bit - 0.5f) * fac->dp_multiplier * 2 * inv_d_sqrt + + ((centroid_in == nullptr) ? 0 : centroid_in[j]); + } + } +} + +struct RaBitDistanceComputer : FlatCodesDistanceComputer { + // dimensionality + size_t d = 0; + // a centroid to use + const float* centroid = nullptr; + + // the metric + MetricType metric_type = MetricType::METRIC_L2; + + RaBitDistanceComputer(); + + float symmetric_dis(idx_t i, idx_t j) override; +}; + +RaBitDistanceComputer::RaBitDistanceComputer() = default; + +float RaBitDistanceComputer::symmetric_dis(idx_t i, idx_t j) { + FAISS_THROW_MSG("Not implemented"); +} + +struct RaBitDistanceComputerNotQ : RaBitDistanceComputer { + // the rotated query (qr - c) + std::vector rotated_q; + // some additional numbers for the query + QueryFactorsData query_fac; + + RaBitDistanceComputerNotQ(); + + float distance_to_code(const uint8_t* code) override; + + void set_query(const float* x) override; +}; + +RaBitDistanceComputerNotQ::RaBitDistanceComputerNotQ() = default; + +float RaBitDistanceComputerNotQ::distance_to_code(const uint8_t* code) { + FAISS_ASSERT(code != nullptr); + FAISS_ASSERT( + (metric_type == MetricType::METRIC_L2 || + metric_type == MetricType::METRIC_INNER_PRODUCT)); + + // split the code into parts + const uint8_t* binary_data = code; + const FactorsData* fac = + reinterpret_cast(code + (d + 7) / 8); + + // this is the baseline code + // + // compute using floats + float dot_qo = 0; + // It was a willful decision (after the discussion) to not to pre-cache + // the sum of all bits, just in order to reduce the overhead per vector. + uint64_t sum_q = 0; + for (size_t i = 0; i < d; i++) { + // extract i-th bit + const uint8_t masker = (1 << (i % 8)); + const bool b_bit = ((binary_data[i / 8] & masker) == masker); + + // accumulate dp + dot_qo += (b_bit) ? rotated_q[i] : 0; + // accumulate sum-of-bits + sum_q += (b_bit) ? 1 : 0; + } + + float final_dot = 0; + // dot-product itself + final_dot += query_fac.c1 * dot_qo; + // normalizer coefficients + final_dot += query_fac.c2 * sum_q; + // normalizer coefficients + final_dot -= query_fac.c34; + + // this is ||or - c||^2 - (IP ? ||or||^2 : 0) + const float or_c_l2sqr = fac->or_minus_c_l2sqr; + + // pre_dist = ||or - c||^2 + ||qr - c||^2 - + // 2 * ||or - c|| * ||qr - c|| * - (IP ? ||or||^2 : 0) + const float pre_dist = or_c_l2sqr + query_fac.qr_to_c_L2sqr - + 2 * fac->dp_multiplier * final_dot; + + if (metric_type == MetricType::METRIC_L2) { + // ||or - q||^ 2 + return pre_dist; + } else { + // metric == MetricType::METRIC_INNER_PRODUCT + + // this is ||q||^2 + const float query_norm_sqr = query_fac.qr_norm_L2sqr; + + // 2 * (or, q) = (||or - q||^2 - ||q||^2 - ||or||^2) + return -0.5f * (pre_dist - query_norm_sqr); + } +} + +void RaBitDistanceComputerNotQ::set_query(const float* x) { + FAISS_ASSERT(x != nullptr); + FAISS_ASSERT( + (metric_type == MetricType::METRIC_L2 || + metric_type == MetricType::METRIC_INNER_PRODUCT)); + + // compute the distance from the query to the centroid + if (centroid != nullptr) { + query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d); + } else { + query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d); + } + + // subtract c, obtain P^(-1)(qr - c) + rotated_q.resize(d); + for (size_t i = 0; i < d; i++) { + rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]); + } + + // compute some numbers + const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d)); + + // do not quantize the query + float sum_q = 0; + for (size_t i = 0; i < d; i++) { + sum_q += rotated_q[i]; + } + + query_fac.c1 = 2 * inv_d; + query_fac.c2 = 0; + query_fac.c34 = sum_q * inv_d; + + if (metric_type == MetricType::METRIC_INNER_PRODUCT) { + // precompute if needed + query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d); + } +} + +// +struct RaBitDistanceComputerQ : RaBitDistanceComputer { + // the rotated and quantized query (qr - c) + std::vector rotated_qq; + // we're using the proposed relayout-ed scheme from 3.3 that allows + // using popcounts for computing the distance. + std::vector rearranged_rotated_qq; + // some additional numbers for the query + QueryFactorsData query_fac; + + // the number of bits for SQ quantization of the query (qb > 0) + uint8_t qb = 8; + // the smallest value divisible by 8 that is not smaller than dim + size_t popcount_aligned_dim = 0; + + RaBitDistanceComputerQ(); + + float distance_to_code(const uint8_t* code) override; + + void set_query(const float* x) override; +}; + +RaBitDistanceComputerQ::RaBitDistanceComputerQ() = default; + +float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) { + FAISS_ASSERT(code != nullptr); + FAISS_ASSERT( + (metric_type == MetricType::METRIC_L2 || + metric_type == MetricType::METRIC_INNER_PRODUCT)); + + // split the code into parts + const uint8_t* binary_data = code; + const FactorsData* fac = + reinterpret_cast(code + (d + 7) / 8); + + // // this is the baseline code + // // + // // compute using integers + // size_t dot_qo = 0; + // for (size_t i = 0; i < d; i++) { + // // extract i-th bit + // const uint8_t masker = (1 << (i % 8)); + // const uint8_t bit = ((binary_data[i / 8] & masker) == masker) ? 1 : + // 0; + // + // // accumulate dp + // dot_qo += bit * rotated_qq[i]; + // } + + // this is the scheme for popcount + const size_t di_8b = (d + 7) / 8; + const size_t di_64b = (di_8b / 8) * 8; + + uint64_t dot_qo = 0; + for (size_t j = 0; j < qb; j++) { + const uint8_t* query_j = rearranged_rotated_qq.data() + j * di_8b; + + // process 64-bit popcounts + uint64_t count_dot = 0; + for (size_t i = 0; i < di_64b; i += 8) { + const auto qv = *(const uint64_t*)(query_j + i); + const auto yv = *(const uint64_t*)(binary_data + i); + count_dot += __builtin_popcountll(qv & yv); + } + + // process leftovers + for (size_t i = di_64b; i < di_8b; i++) { + const auto qv = *(query_j + i); + const auto yv = *(binary_data + i); + count_dot += __builtin_popcount(qv & yv); + } + + dot_qo += (count_dot << j); + } + + // It was a willful decision (after the discussion) to not to pre-cache + // the sum of all bits, just in order to reduce the overhead per vector. + uint64_t sum_q = 0; + { + // process 64-bit popcounts + for (size_t i = 0; i < di_64b; i += 8) { + const auto yv = *(const uint64_t*)(binary_data + i); + sum_q += __builtin_popcountll(yv); + } + + // process leftovers + for (size_t i = di_64b; i < di_8b; i++) { + const auto yv = *(binary_data + i); + sum_q += __builtin_popcount(yv); + } + } + + float final_dot = 0; + // dot-product itself + final_dot += query_fac.c1 * dot_qo; + // normalizer coefficients + final_dot += query_fac.c2 * sum_q; + // normalizer coefficients + final_dot -= query_fac.c34; + + // this is ||or - c||^2 - (IP ? ||or||^2 : 0) + const float or_c_l2sqr = fac->or_minus_c_l2sqr; + + // pre_dist = ||or - c||^2 + ||qr - c||^2 - + // 2 * ||or - c|| * ||qr - c|| * - (IP ? ||or||^2 : 0) + const float pre_dist = or_c_l2sqr + query_fac.qr_to_c_L2sqr - + 2 * fac->dp_multiplier * final_dot; + + if (metric_type == MetricType::METRIC_L2) { + // ||or - q||^ 2 + return pre_dist; + } else { + // metric == MetricType::METRIC_INNER_PRODUCT + + // this is ||q||^2 + const float query_norm_sqr = query_fac.qr_norm_L2sqr; + + // 2 * (or, q) = (||or - q||^2 - ||q||^2 - ||or||^2) + return -0.5f * (pre_dist - query_norm_sqr); + } +} + +void RaBitDistanceComputerQ::set_query(const float* x) { + FAISS_ASSERT(x != nullptr); + FAISS_ASSERT( + (metric_type == MetricType::METRIC_L2 || + metric_type == MetricType::METRIC_INNER_PRODUCT)); + + // compute the distance from the query to the centroid + if (centroid != nullptr) { + query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d); + } else { + query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d); + } + + // allocate space + rotated_qq.resize(d); + + // rotate the query + std::vector rotated_q(d); + for (size_t i = 0; i < d; i++) { + rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]); + } + + // compute some numbers + const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d)); + + // quantize the query. compute min and max + float v_min = std::numeric_limits::max(); + float v_max = std::numeric_limits::lowest(); + for (size_t i = 0; i < d; i++) { + const float v_q = rotated_q[i]; + v_min = std::min(v_min, v_q); + v_max = std::max(v_max, v_q); + } + + const float pow_2_qb = 1 << qb; + + const float delta = (v_max - v_min) / (pow_2_qb - 1); + const float inv_delta = 1.0f / delta; + + size_t sum_qq = 0; + for (int32_t i = 0; i < d; i++) { + const float v_q = rotated_q[i]; + + // a default non-randomized SQ + const int v_qq = std::round((v_q - v_min) * inv_delta); + + rotated_qq[i] = std::min(255, std::max(0, v_qq)); + sum_qq += v_qq; + } + + // rearrange the query vector + popcount_aligned_dim = ((d + 7) / 8) * 8; + size_t offset = (d + 7) / 8; + + rearranged_rotated_qq.resize(offset * qb); + std::fill(rearranged_rotated_qq.begin(), rearranged_rotated_qq.end(), 0); + + for (size_t idim = 0; idim < d; idim++) { + for (size_t iv = 0; iv < qb; iv++) { + const bool bit = ((rotated_qq[idim] & (1 << iv)) != 0); + rearranged_rotated_qq[iv * offset + idim / 8] |= + bit ? (1 << (idim % 8)) : 0; + } + } + + query_fac.c1 = 2 * delta * inv_d; + query_fac.c2 = 2 * v_min * inv_d; + query_fac.c34 = inv_d * (delta * sum_qq + d * v_min); + + if (metric_type == MetricType::METRIC_INNER_PRODUCT) { + // precompute if needed + query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d); + } +} + +FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer( + uint8_t qb, + const float* centroid_in) const { + if (qb == 0) { + auto dc = std::make_unique(); + dc->metric_type = metric_type; + dc->d = d; + dc->centroid = centroid_in; + + return dc.release(); + } else { + auto dc = std::make_unique(); + dc->metric_type = metric_type; + dc->d = d; + dc->centroid = centroid_in; + dc->qb = qb; + + return dc.release(); + } +} + +} // namespace faiss diff --git a/faiss/impl/RaBitQuantizer.h b/faiss/impl/RaBitQuantizer.h new file mode 100644 index 0000000000..01115838a4 --- /dev/null +++ b/faiss/impl/RaBitQuantizer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +namespace faiss { + +// the reference implementation of the https://arxiv.org/pdf/2405.12497 +// Jianyang Gao, Cheng Long, "RaBitQ: Quantizing High-Dimensional Vectors +// with a Theoretical Error Bound for Approximate Nearest Neighbor Search". +// +// It is assumed that the Random Matrix Rotation is performed externally. +struct RaBitQuantizer : Quantizer { + // all RaBitQ operations are provided against a centroid, which needs + // to be provided Externally (!). Nullptr value implies that the centroid + // consists of zero values. + // This is the default value that can be customized using XYZ_core() calls. + // Such a customization is needed for IVF calls. + // + // This particular pointer will NOT be serialized. + float* centroid = nullptr; + + // RaBitQ codes computations are independent from a metric. But it is needed + // to store some additional fp32 constants together with a quantized code. + // A decision was made to make this quantizer as space efficient as + // possible. Thus, a quantizer has to introduce a metric. + MetricType metric_type = MetricType::METRIC_L2; + + RaBitQuantizer(size_t d = 0, MetricType metric = MetricType::METRIC_L2); + + void train(size_t n, const float* x) override; + + // every vector is expected to take (d + 7) / 8 + sizeof(FactorsData) bytes, + void compute_codes(const float* x, uint8_t* codes, size_t n) const override; + + void compute_codes_core( + const float* x, + uint8_t* codes, + size_t n, + const float* centroid_in) const; + + // The decode output is Heavily geared towards maintaining the IP, not L2. + // This means that the reconstructed codes maybe less accurate than one may + // expect, if one computes an L2 distance between a reconstructed code and + // the corresponding original vector. + // But value of the dot product between a query and the original vector + // might be very close to the value of the dot product between a query and + // the reconstructed code. + // Basically, it seems to be related to the distributions of values, not + // values. + void decode(const uint8_t* codes, float* x, size_t n) const override; + + void decode_core( + const uint8_t* codes, + float* x, + size_t n, + const float* centroid_in) const; + + // returns the distance computer. + // specify qb = 0 to get an DC that does not quantize a query + // specify qb > 0 to have SQ qb-bits query + FlatCodesDistanceComputer* get_distance_computer( + uint8_t qb, + const float* centroid_in = nullptr) const; +}; + +} // namespace faiss diff --git a/faiss/impl/ResultHandler.h b/faiss/impl/ResultHandler.h index 104b34db3b..c5bc3ca76e 100644 --- a/faiss/impl/ResultHandler.h +++ b/faiss/impl/ResultHandler.h @@ -535,7 +535,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler { try { // finalize the partial result pres.finalize(); - } catch (const faiss::FaissException& e) { + } catch ([[maybe_unused]] const faiss::FaissException& e) { // Do nothing if allocation fails in finalizing partial results. #ifndef NDEBUG std::cerr << e.what() << std::endl; @@ -599,7 +599,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler { if (partial_results.size() > 0) { RangeSearchPartialResult::merge(partial_results); } - } catch (const faiss::FaissException& e) { + } catch ([[maybe_unused]] const faiss::FaissException& e) { // Do nothing if allocation fails in merge. #ifndef NDEBUG std::cerr << e.what() << std::endl; diff --git a/faiss/impl/code_distance/code_distance-sve.h b/faiss/impl/code_distance/code_distance-sve.h index 713b7d8099..82f7746be6 100644 --- a/faiss/impl/code_distance/code_distance-sve.h +++ b/faiss/impl/code_distance/code_distance-sve.h @@ -14,6 +14,7 @@ #include #include +#include #include namespace faiss { @@ -48,7 +49,7 @@ static inline void distance_codes_kernel( partialSum = svadd_f32_m(pg, partialSum, collected); } -static float distance_single_code_sve_for_small_m( +static inline float distance_single_code_sve_for_small_m( // the product quantizer const size_t M, // precomputed distances, layout (M, ksub) @@ -196,7 +197,7 @@ distance_four_codes_sve( result3); } -static void distance_four_codes_sve_for_small_m( +static inline void distance_four_codes_sve_for_small_m( // the product quantizer const size_t M, // precomputed distances, layout (M, ksub) @@ -217,8 +218,6 @@ static void distance_four_codes_sve_for_small_m( const auto offsets_0 = svindex_u32(0, static_cast(ksub)); - const auto quad_lanes = svcntw(); - // loop const auto pg = svwhilelt_b32_u64(0, M); diff --git a/faiss/impl/index_read.cpp b/faiss/impl/index_read.cpp index 4c1bc27c28..44c0c1e731 100644 --- a/faiss/impl/index_read.cpp +++ b/faiss/impl/index_read.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -53,8 +56,141 @@ #include #include +// mmap-ing and viewing facilities +#include + +#include +#include + namespace faiss { +/************************************************************* + * Mmap-ing and viewing facilities + **************************************************************/ + +// This is a baseline functionality for reading mmapped and zerocopied vector. +// * if `beforeknown_size` is defined, then a size of the vector won't be read. +// * if `size_multiplier` is defined, then a size will be multiplied by it. +// * returns true is the case was handled; ownerwise, false +template +bool read_vector_base( + VectorT& target, + IOReader* f, + const std::optional beforeknown_size, + const std::optional size_multiplier) { + // check if the use case is right + if constexpr (is_maybe_owned_vector_v) { + // is it a mmap-enabled reader? + MappedFileIOReader* mf = dynamic_cast(f); + if (mf != nullptr) { + // read the size or use a known one + size_t size = 0; + if (beforeknown_size.has_value()) { + size = beforeknown_size.value(); + } else { + READANDCHECK(&size, 1); + } + + // perform the size multiplication + size *= size_multiplier.value_or(1); + + // ok, mmap and check + char* address = nullptr; + const size_t nread = mf->mmap( + (void**)&address, + sizeof(typename VectorT::value_type), + size); + + FAISS_THROW_IF_NOT_FMT( + nread == (size), + "read error in %s: %zd != %zd (%s)", + f->name.c_str(), + nread, + size, + strerror(errno)); + + VectorT mmapped_view = + VectorT::create_view(address, nread, mf->mmap_owner); + target = std::move(mmapped_view); + + return true; + } + + // is it a zero-copy reader? + ZeroCopyIOReader* zr = dynamic_cast(f); + if (zr != nullptr) { + // read the size or use a known one + size_t size = 0; + if (beforeknown_size.has_value()) { + size = beforeknown_size.value(); + } else { + READANDCHECK(&size, 1); + } + + // perform the size multiplication + size *= size_multiplier.value_or(1); + + // create a view + char* address = nullptr; + size_t nread = zr->get_data_view( + (void**)&address, + sizeof(typename VectorT::value_type), + size); + + FAISS_THROW_IF_NOT_FMT( + nread == (size), + "read error in %s: %zd != %zd (%s)", + f->name.c_str(), + nread, + size_t(size), + strerror(errno)); + + VectorT view = VectorT::create_view(address, nread, nullptr); + target = std::move(view); + + return true; + } + } + + return false; +} + +// a replacement for READANDCHECK for reading data into std::vector +template +void read_vector_with_known_size(VectorT& target, IOReader* f, size_t size) { + // size is known beforehand, no size multiplication + if (read_vector_base(target, f, size, std::nullopt)) { + return; + } + + // the default case + READANDCHECK(target.data(), size); +} + +// a replacement for READVECTOR +template +void read_vector(VectorT& target, IOReader* f) { + // size is not known beforehand, no size multiplication + if (read_vector_base(target, f, std::nullopt, std::nullopt)) { + return; + } + + // the default case + READVECTOR(target); +} + +// a replacement for READXBVECTOR +template +void read_xb_vector(VectorT& target, IOReader* f) { + // size is not known beforehand, nultiply the size 4x + if (read_vector_base(target, f, std::nullopt, 4)) { + return; + } + + // the default case + READXBVECTOR(target); +} + /************************************************************* * Read **************************************************************/ @@ -206,8 +342,9 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) { if (n > 0) { ails->ids[i].resize(n); ails->codes[i].resize(n * ails->code_size); - READANDCHECK(ails->codes[i].data(), n * ails->code_size); - READANDCHECK(ails->ids[i].data(), n); + read_vector_with_known_size( + ails->codes[i], f, n * ails->code_size); + read_vector_with_known_size(ails->ids[i], f, n); } } return ails; @@ -276,7 +413,7 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) { aq->search_type == AdditiveQuantizer::ST_norm_cqint4 || aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 || aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) { - READXBVECTOR(aq->qnorm.codes); + read_xb_vector(aq->qnorm.codes, f); aq->qnorm.ntotal = aq->qnorm.codes.size() / 4; aq->qnorm.update_permutation(); } @@ -366,7 +503,7 @@ static void read_HNSW(HNSW* hnsw, IOReader* f) { READVECTOR(hnsw->cum_nneighbor_per_level); READVECTOR(hnsw->levels); READVECTOR(hnsw->offsets); - READVECTOR(hnsw->neighbors); + read_vector(hnsw->neighbors, f); READ1(hnsw->entry_point); READ1(hnsw->max_level); @@ -440,6 +577,13 @@ ProductQuantizer* read_ProductQuantizer(IOReader* reader) { return pq; } +static void read_RaBitQuantizer(RaBitQuantizer* rabitq, IOReader* f) { + // don't care about rabitq->centroid + READ1(rabitq->d); + READ1(rabitq->code_size); + READ1(rabitq->metric_type); +} + void read_direct_map(DirectMap* dm, IOReader* f) { char maintain_direct_map; READ1(maintain_direct_map); @@ -479,7 +623,12 @@ ArrayInvertedLists* set_array_invlist( std::vector>& ids) { ArrayInvertedLists* ail = new ArrayInvertedLists(ivf->nlist, ivf->code_size); - std::swap(ail->ids, ids); + + ail->ids.resize(ids.size()); + for (size_t i = 0; i < ids.size(); i++) { + ail->ids[i] = MaybeOwnedVector(std::move(ids[i])); + } + ivf->invlists = ail; ivf->own_invlists = true; return ail; @@ -547,7 +696,7 @@ Index* read_index(IOReader* f, int io_flags) { read_index_header(idxf, f); idxf->code_size = idxf->d * sizeof(float); - READXBVECTOR(idxf->codes); + read_xb_vector(idxf->codes, f); FAISS_THROW_IF_NOT( idxf->codes.size() == idxf->ntotal * idxf->code_size); // leak! @@ -578,7 +727,7 @@ Index* read_index(IOReader* f, int io_flags) { idxl->rrot = *rrot; delete rrot; } - READVECTOR(idxl->codes); + read_vector(idxl->codes, f); FAISS_THROW_IF_NOT( idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits); FAISS_THROW_IF_NOT( @@ -591,7 +740,7 @@ Index* read_index(IOReader* f, int io_flags) { read_index_header(idxp, f); read_ProductQuantizer(&idxp->pq, f); idxp->code_size = idxp->pq.code_size; - READVECTOR(idxp->codes); + read_vector(idxp->codes, f); if (h == fourcc("IxPo") || h == fourcc("IxPq")) { READ1(idxp->search_type); READ1(idxp->encode_signs); @@ -613,28 +762,28 @@ Index* read_index(IOReader* f, int io_flags) { read_ResidualQuantizer(&idxr->rq, f, io_flags); } READ1(idxr->code_size); - READVECTOR(idxr->codes); + read_vector(idxr->codes, f); idx = idxr; } else if (h == fourcc("IxLS")) { auto idxr = new IndexLocalSearchQuantizer(); read_index_header(idxr, f); read_LocalSearchQuantizer(&idxr->lsq, f); READ1(idxr->code_size); - READVECTOR(idxr->codes); + read_vector(idxr->codes, f); idx = idxr; } else if (h == fourcc("IxPR")) { auto idxpr = new IndexProductResidualQuantizer(); read_index_header(idxpr, f); read_ProductResidualQuantizer(&idxpr->prq, f, io_flags); READ1(idxpr->code_size); - READVECTOR(idxpr->codes); + read_vector(idxpr->codes, f); idx = idxpr; } else if (h == fourcc("IxPL")) { auto idxpl = new IndexProductLocalSearchQuantizer(); read_index_header(idxpl, f); read_ProductLocalSearchQuantizer(&idxpl->plsq, f); READ1(idxpl->code_size); - READVECTOR(idxpl->codes); + read_vector(idxpl->codes, f); idx = idxpl; } else if (h == fourcc("ImRQ")) { ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer(); @@ -791,7 +940,7 @@ Index* read_index(IOReader* f, int io_flags) { IndexScalarQuantizer* idxs = new IndexScalarQuantizer(); read_index_header(idxs, f); read_ScalarQuantizer(&idxs->sq, f); - READVECTOR(idxs->codes); + read_vector(idxs->codes, f); idxs->code_size = idxs->sq.code_size; idx = idxs; } else if (h == fourcc("IxLa")) { @@ -949,7 +1098,7 @@ Index* read_index(IOReader* f, int io_flags) { READ1(idxp->code_size_1); READ1(idxp->code_size_2); READ1(idxp->code_size); - READVECTOR(idxp->codes); + read_vector(idxp->codes, f); idx = idxp; } else if ( h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") || @@ -1062,6 +1211,24 @@ Index* read_index(IOReader* f, int io_flags) { imm->own_fields = true; idx = imm; + } else if (h == fourcc("Ixrq")) { + IndexRaBitQ* idxq = new IndexRaBitQ(); + read_index_header(idxq, f); + read_RaBitQuantizer(&idxq->rabitq, f); + READVECTOR(idxq->codes); + READVECTOR(idxq->center); + READ1(idxq->qb); + idxq->code_size = idxq->rabitq.code_size; + idx = idxq; + } else if (h == fourcc("Iwrq")) { + IndexIVFRaBitQ* ivrq = new IndexIVFRaBitQ(); + read_ivf_header(ivrq, f); + read_RaBitQuantizer(&ivrq->rabitq, f); + READ1(ivrq->code_size); + READ1(ivrq->by_residual); + READ1(ivrq->qb); + read_InvertedLists(ivrq, f, io_flags); + idx = ivrq; } else { FAISS_THROW_FMT( "Index type 0x%08x (\"%s\") not recognized", @@ -1073,14 +1240,28 @@ Index* read_index(IOReader* f, int io_flags) { } Index* read_index(FILE* f, int io_flags) { - FileIOReader reader(f); - return read_index(&reader, io_flags); + if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { + // enable mmap-supporting IOReader + auto owner = std::make_shared(f); + MappedFileIOReader reader(owner); + return read_index(&reader, io_flags); + } else { + FileIOReader reader(f); + return read_index(&reader, io_flags); + } } Index* read_index(const char* fname, int io_flags) { - FileIOReader reader(fname); - Index* idx = read_index(&reader, io_flags); - return idx; + if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { + // enable mmap-supporting IOReader + auto owner = std::make_shared(fname); + MappedFileIOReader reader(owner); + return read_index(&reader, io_flags); + } else { + FileIOReader reader(fname); + Index* idx = read_index(&reader, io_flags); + return idx; + } } VectorTransform* read_VectorTransform(const char* fname) { @@ -1183,7 +1364,7 @@ IndexBinary* read_index_binary(IOReader* f, int io_flags) { if (h == fourcc("IBxF")) { IndexBinaryFlat* idxf = new IndexBinaryFlat(); read_index_binary_header(idxf, f); - READVECTOR(idxf->xb); + read_vector(idxf->xb, f); FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size); // leak! idx = idxf; @@ -1251,14 +1432,28 @@ IndexBinary* read_index_binary(IOReader* f, int io_flags) { } IndexBinary* read_index_binary(FILE* f, int io_flags) { - FileIOReader reader(f); - return read_index_binary(&reader, io_flags); + if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { + // enable mmap-supporting IOReader + auto owner = std::make_shared(f); + MappedFileIOReader reader(owner); + return read_index_binary(&reader, io_flags); + } else { + FileIOReader reader(f); + return read_index_binary(&reader, io_flags); + } } IndexBinary* read_index_binary(const char* fname, int io_flags) { - FileIOReader reader(fname); - IndexBinary* idx = read_index_binary(&reader, io_flags); - return idx; + if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) { + // enable mmap-supporting IOReader + auto owner = std::make_shared(fname); + MappedFileIOReader reader(owner); + return read_index_binary(&reader, io_flags); + } else { + FileIOReader reader(fname); + IndexBinary* idx = read_index_binary(&reader, io_flags); + return idx; + } } } // namespace faiss diff --git a/faiss/impl/index_write.cpp b/faiss/impl/index_write.cpp index 0118ef4711..5b65454fe3 100644 --- a/faiss/impl/index_write.cpp +++ b/faiss/impl/index_write.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -364,6 +366,13 @@ static void write_NNDescent(const NNDescent* nnd, IOWriter* f) { WRITEVECTOR(nnd->final_graph); } +static void write_RaBitQuantizer(const RaBitQuantizer* rabitq, IOWriter* f) { + // don't care about rabitq->centroid + WRITE1(rabitq->d); + WRITE1(rabitq->code_size); + WRITE1(rabitq->metric_type); +} + static void write_direct_map(const DirectMap* dm, IOWriter* f) { char maintain_direct_map = (char)dm->type; // for backwards compatibility with bool @@ -850,6 +859,26 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) { WRITE1(h); write_index_header(imm_2, f); write_index(imm_2->index, f); + } else if ( + const IndexRaBitQ* idxq = dynamic_cast(idx)) { + uint32_t h = fourcc("Ixrq"); + WRITE1(h); + write_index_header(idx, f); + write_RaBitQuantizer(&idxq->rabitq, f); + WRITEVECTOR(idxq->codes); + WRITEVECTOR(idxq->center); + WRITE1(idxq->qb); + } else if ( + const IndexIVFRaBitQ* ivrq = + dynamic_cast(idx)) { + uint32_t h = fourcc("Iwrq"); + WRITE1(h); + write_ivf_header(ivrq, f); + write_RaBitQuantizer(&ivrq->rabitq, f); + WRITE1(ivrq->code_size); + WRITE1(ivrq->by_residual); + WRITE1(ivrq->qb); + write_InvertedLists(ivrq->invlists, f); } else { FAISS_THROW_MSG("don't know how to serialize this type of index"); } diff --git a/faiss/impl/io.h b/faiss/impl/io.h index 9e28d64e9d..a2def099b5 100644 --- a/faiss/impl/io.h +++ b/faiss/impl/io.h @@ -16,12 +16,12 @@ #pragma once +#include +#include #include #include #include -#include - namespace faiss { struct IOReader { diff --git a/faiss/impl/io_macros.h b/faiss/impl/io_macros.h index c874ccf35c..5449ba1cc0 100644 --- a/faiss/impl/io_macros.h +++ b/faiss/impl/io_macros.h @@ -7,6 +7,8 @@ #pragma once +#include + /************************************************************* * I/O macros * diff --git a/faiss/impl/mapped_io.cpp b/faiss/impl/mapped_io.cpp new file mode 100644 index 0000000000..32486a9e6d --- /dev/null +++ b/faiss/impl/mapped_io.cpp @@ -0,0 +1,313 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#ifdef __linux__ + +#include +#include +#include +#include +#include + +#elif defined(_WIN32) + +#include // @manual +#include // @manual + +#endif + +#include + +#include +#include + +namespace faiss { + +#ifdef __linux__ + +struct MmappedFileMappingOwner::PImpl { + void* ptr = nullptr; + size_t ptr_size = 0; + + PImpl(const std::string& filename) { + auto f = std::unique_ptr( + fopen(filename.c_str(), "r"), &fclose); + FAISS_THROW_IF_NOT_FMT( + f.get(), + "could not open %s for reading: %s", + filename.c_str(), + strerror(errno)); + + // get the size + struct stat s; + int status = fstat(fileno(f.get()), &s); + FAISS_THROW_IF_NOT_FMT( + status >= 0, "fstat() failed: %s", strerror(errno)); + + const size_t filesize = s.st_size; + + void* address = mmap( + nullptr, filesize, PROT_READ, MAP_SHARED, fileno(f.get()), 0); + FAISS_THROW_IF_NOT_FMT( + address != nullptr, "could not mmap(): %s", strerror(errno)); + + // btw, fd can be closed here + + madvise(address, filesize, MADV_RANDOM); + + // save it + ptr = address; + ptr_size = filesize; + } + + PImpl(FILE* f) { + // get the size + struct stat s; + int status = fstat(fileno(f), &s); + FAISS_THROW_IF_NOT_FMT( + status >= 0, "fstat() failed: %s", strerror(errno)); + + const size_t filesize = s.st_size; + + void* address = + mmap(nullptr, filesize, PROT_READ, MAP_SHARED, fileno(f), 0); + FAISS_THROW_IF_NOT_FMT( + address != nullptr, "could not mmap(): %s", strerror(errno)); + + // btw, fd can be closed here + + madvise(address, filesize, MADV_RANDOM); + + // save it + ptr = address; + ptr_size = filesize; + } + + ~PImpl() { + // todo: check for an error + munmap(ptr, ptr_size); + } +}; + +#elif defined(_WIN32) + +struct MmappedFileMappingOwner::PImpl { + void* ptr = nullptr; + size_t ptr_size = 0; + HANDLE mapping_handle = INVALID_HANDLE_VALUE; + + PImpl(const std::string& filename) { + HANDLE file_handle = CreateFile( + filename.c_str(), + GENERIC_READ, + FILE_SHARE_READ, + nullptr, + OPEN_EXISTING, + 0, + nullptr); + if (file_handle == INVALID_HANDLE_VALUE) { + const auto error = GetLastError(); + FAISS_THROW_FMT( + "could not open the file, %s (error %d)", + filename.c_str(), + error); + } + + // get the size of the file + LARGE_INTEGER len_li; + if (GetFileSizeEx(file_handle, &len_li) == 0) { + const auto error = GetLastError(); + + CloseHandle(file_handle); + + FAISS_THROW_FMT( + "could not get the file size, %s (error %d)", + filename.c_str(), + error); + } + + // create a mapping + mapping_handle = CreateFileMapping( + file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (mapping_handle == 0) { + const auto error = GetLastError(); + + CloseHandle(file_handle); + + FAISS_THROW_FMT( + "could not create a file mapping, %s (error %d)", + filename.c_str(), + error); + } + CloseHandle(file_handle); + + char* data = + (char*)MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, 0); + if (data == nullptr) { + const auto error = GetLastError(); + + CloseHandle(mapping_handle); + mapping_handle = INVALID_HANDLE_VALUE; + + FAISS_THROW_FMT( + "could not get map the file, %s (error %d)", + filename.c_str(), + error); + } + + ptr = data; + ptr_size = len_li.QuadPart; + } + + PImpl(FILE* f) { + // obtain a HANDLE from a FILE + const int fd = _fileno(f); + if (fd == -1) { + // no good + FAISS_THROW_FMT("could not get a HANDLE"); + } + + HANDLE file_handle = (HANDLE)_get_osfhandle(fd); + if (file_handle == INVALID_HANDLE_VALUE) { + FAISS_THROW_FMT("could not get an OS HANDLE"); + } + + // get the size of the file + LARGE_INTEGER len_li; + if (GetFileSizeEx(file_handle, &len_li) == 0) { + const auto error = GetLastError(); + FAISS_THROW_FMT("could not get the file size (error %d)", error); + } + + // create a mapping + mapping_handle = CreateFileMapping( + file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (mapping_handle == 0) { + const auto error = GetLastError(); + FAISS_THROW_FMT( + "could not create a file mapping, (error %d)", error); + } + + // the handle is provided externally, so this is not our business + // to close file_handle. + + char* data = + (char*)MapViewOfFile(mapping_handle, FILE_MAP_READ, 0, 0, 0); + if (data == nullptr) { + const auto error = GetLastError(); + + CloseHandle(mapping_handle); + mapping_handle = INVALID_HANDLE_VALUE; + + FAISS_THROW_FMT("could not get map the file, (error %d)", error); + } + + ptr = data; + ptr_size = len_li.QuadPart; + } + + ~PImpl() { + if (mapping_handle != INVALID_HANDLE_VALUE) { + UnmapViewOfFile(ptr); + CloseHandle(mapping_handle); + + mapping_handle = INVALID_HANDLE_VALUE; + ptr = nullptr; + } + } +}; + +#else + +struct MmappedFileMappingOwner::PImpl { + void* ptr = nullptr; + size_t ptr_size = 0; + + PImpl(const std::string& filename) { + FAISS_THROW_MSG("Not implemented"); + } + + PImpl(FILE* f) { + FAISS_THROW_MSG("Not implemented"); + } +}; + +#endif + +MmappedFileMappingOwner::MmappedFileMappingOwner(const std::string& filename) { + p_impl = std::make_unique(filename); +} + +MmappedFileMappingOwner::MmappedFileMappingOwner(FILE* f) { + p_impl = std::make_unique(f); +} + +MmappedFileMappingOwner::~MmappedFileMappingOwner() = default; + +// +void* MmappedFileMappingOwner::data() const { + return p_impl->ptr; +} + +size_t MmappedFileMappingOwner::size() const { + return p_impl->ptr_size; +} + +MappedFileIOReader::MappedFileIOReader( + const std::shared_ptr& owner) + : mmap_owner(owner) {} + +// this operation performs a copy +size_t MappedFileIOReader::operator()(void* ptr, size_t size, size_t nitems) { + if (size * nitems == 0) { + return 0; + } + + char* ptr_c = nullptr; + + const size_t actual_nitems = this->mmap((void**)&ptr_c, size, nitems); + if (actual_nitems > 0) { + memcpy(ptr, ptr_c, size * actual_nitems); + } + + return actual_nitems; +} + +// this operation returns a mmapped address, owned by mmap_owner +size_t MappedFileIOReader::mmap(void** ptr, size_t size, size_t nitems) { + if (size == 0) { + return nitems; + } + + size_t actual_size = size * nitems; + if (pos + size * nitems > mmap_owner->size()) { + actual_size = mmap_owner->size() - pos; + } + + size_t actual_nitems = (actual_size + size - 1) / size; + if (actual_nitems == 0) { + return 0; + } + + // get an address + *ptr = (void*)(reinterpret_cast(mmap_owner->data()) + pos); + + // alter pos + pos += size * actual_nitems; + + return actual_nitems; +} + +int MappedFileIOReader::filedescriptor() { + // todo + return -1; +} + +} // namespace faiss diff --git a/faiss/impl/mapped_io.h b/faiss/impl/mapped_io.h new file mode 100644 index 0000000000..0e32df23d8 --- /dev/null +++ b/faiss/impl/mapped_io.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace faiss { + +// holds a memory-mapped region over a file +struct MmappedFileMappingOwner : public MaybeOwnedVectorOwner { + MmappedFileMappingOwner(const std::string& filename); + MmappedFileMappingOwner(FILE* f); + ~MmappedFileMappingOwner(); + + void* data() const; + size_t size() const; + + struct PImpl; + std::unique_ptr p_impl; +}; + +// A deserializer that supports memory-mapped files. +// All de-allocations should happen as soon as the index gets destroyed, +// after all underlying the MaybeOwnerVector objects are destroyed. +struct MappedFileIOReader : IOReader { + std::shared_ptr mmap_owner; + + size_t pos = 0; + + MappedFileIOReader(const std::shared_ptr& owner); + + // perform a copy + size_t operator()(void* ptr, size_t size, size_t nitems) override; + // perform a quasi-read that returns a mmapped address, owned by mmap_owner, + // and updates the position + size_t mmap(void** ptr, size_t size, size_t nitems); + + int filedescriptor() override; +}; + +} // namespace faiss diff --git a/faiss/impl/maybe_owned_vector.h b/faiss/impl/maybe_owned_vector.h new file mode 100644 index 0000000000..4b6770dac8 --- /dev/null +++ b/faiss/impl/maybe_owned_vector.h @@ -0,0 +1,316 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace faiss { + +// An interface for an owner of a MaybeOwnedVector. +struct MaybeOwnedVectorOwner { + virtual ~MaybeOwnedVectorOwner() = default; +}; + +// a container that either works as std::vector that owns its own memory, +// or as a view of a memory buffer, with a known size +template +struct MaybeOwnedVector { + using value_type = T; + using self_type = MaybeOwnedVector; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + using size_type = typename std::vector::size_type; + + bool is_owned = true; + + // this one is used if is_owned == true + std::vector owned_data; + + // these three are used if is_owned == false + T* view_data = nullptr; + // the number of T elements + size_t view_size = 0; + // who owns the data. + // This field can be nullptr, and it is present ONLY in order + // to avoid possible tricky memory / resource leaks. + std::shared_ptr owner; + + // points either to view_data, or to owned.data() + T* c_ptr = nullptr; + // uses either view_size, or owned.size(); + size_t c_size = 0; + + MaybeOwnedVector() = default; + MaybeOwnedVector(const size_t initial_size) { + is_owned = true; + + owned_data.resize(initial_size); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } + + explicit MaybeOwnedVector(const std::vector& vec) + : faiss::MaybeOwnedVector(vec.size()) { + if (vec.size() > 0) { + memcpy(owned_data.data(), vec.data(), sizeof(T) * vec.size()); + } + } + + MaybeOwnedVector(const MaybeOwnedVector& other) { + is_owned = other.is_owned; + owned_data = other.owned_data; + + view_data = other.view_data; + view_size = other.view_size; + owner = other.owner; + + if (is_owned) { + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } else { + c_ptr = view_data; + c_size = view_size; + } + } + + MaybeOwnedVector(MaybeOwnedVector&& other) { + is_owned = other.is_owned; + owned_data = std::move(other.owned_data); + + view_data = other.view_data; + view_size = other.view_size; + owner = std::move(other.owner); + other.owner = nullptr; + + if (is_owned) { + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } else { + c_ptr = view_data; + c_size = view_size; + } + } + + MaybeOwnedVector& operator=(const MaybeOwnedVector& other) { + if (this == &other) { + return *this; + } + + // create a copy + MaybeOwnedVector cloned(other); + // swap + swap(*this, cloned); + + return *this; + } + + MaybeOwnedVector& operator=(MaybeOwnedVector&& other) { + if (this == &other) { + return *this; + } + + // moved + MaybeOwnedVector moved(std::move(other)); + // swap + swap(*this, moved); + + return *this; + } + + MaybeOwnedVector(std::vector&& other) { + is_owned = true; + + owned_data = std::move(other); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } + + static MaybeOwnedVector create_view( + void* address, + const size_t n_elements, + const std::shared_ptr& owner) { + MaybeOwnedVector vec; + vec.is_owned = false; + vec.view_data = reinterpret_cast(address); + vec.view_size = n_elements; + vec.owner = owner; + + vec.c_ptr = vec.view_data; + vec.c_size = vec.view_size; + + return vec; + } + + const T* data() const { + return c_ptr; + } + + T* data() { + return c_ptr; + } + + size_t size() const { + return c_size; + } + + size_t byte_size() const { + return c_size * sizeof(T); + } + + T& operator[](const size_t idx) { + return c_ptr[idx]; + } + + const T& operator[](const size_t idx) const { + return c_ptr[idx]; + } + + T& at(size_type pos) { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.at(pos); + } + + const T& at(size_type pos) const { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.at(pos); + } + + iterator begin() { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.begin(); + } + + const_iterator begin() const { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.begin(); + } + + iterator end() { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.end(); + } + + const_iterator end() const { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + return owned_data.end(); + } + + iterator erase(const_iterator begin, const_iterator end) { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + auto result = owned_data.erase(begin, end); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + + return result; + } + + template + iterator insert(const_iterator pos, InputIt first, InputIt last) { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + auto result = owned_data.insert(pos, first, last); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + + return result; + } + + void clear() { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + owned_data.clear(); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } + + void resize(const size_t new_size) { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + owned_data.resize(new_size); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } + + void resize(const size_t new_size, const value_type v) { + FAISS_ASSERT_MSG( + is_owned, + "This operation cannot be performed on a viewed vector"); + + owned_data.resize(new_size, v); + c_ptr = owned_data.data(); + c_size = owned_data.size(); + } + + friend void swap(self_type& a, self_type& b) { + std::swap(a.is_owned, b.is_owned); + std::swap(a.owned_data, b.owned_data); + std::swap(a.view_data, b.view_data); + std::swap(a.view_size, b.view_size); + std::swap(a.owner, b.owner); + std::swap(a.c_ptr, b.c_ptr); + std::swap(a.c_size, b.c_size); + } +}; + +template +struct is_maybe_owned_vector : std::false_type {}; + +template +struct is_maybe_owned_vector> : std::true_type {}; + +template +inline constexpr bool is_maybe_owned_vector_v = is_maybe_owned_vector::value; + +template +bool operator==( + const MaybeOwnedVector& lhs, + const MaybeOwnedVector& rhs) { + return lhs.size() == rhs.size() && + !memcmp(lhs.data(), rhs.data(), lhs.byte_size()); +} + +template +bool operator!=( + const MaybeOwnedVector& lhs, + const MaybeOwnedVector& rhs) { + return !(lhs == rhs); +} + +} // namespace faiss diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h index 5fc632eb2d..4b03fbd00a 100644 --- a/faiss/impl/platform_macros.h +++ b/faiss/impl/platform_macros.h @@ -12,7 +12,7 @@ #include #include -#ifdef _MSC_VER +#ifdef _WIN32 /******************************************************* * Windows specific macros @@ -24,11 +24,11 @@ #define FAISS_API __declspec(dllimport) #endif // FAISS_MAIN_LIB -#ifdef _MSC_VER #define strtok_r strtok_s -#endif // _MSC_VER +#ifdef _MSC_VER #define __PRETTY_FUNCTION__ __FUNCSIG__ +#endif // _MSC_VER #define posix_memalign(p, a, s) \ (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno) @@ -38,6 +38,7 @@ #define ALIGNED(x) __declspec(align(x)) // redefine the GCC intrinsics with Windows equivalents +#ifdef _MSC_VER #include #include @@ -76,6 +77,7 @@ inline int __builtin_clzll(uint64_t x) { #define __builtin_popcount __popcnt #define __builtin_popcountl __popcnt64 +#define __builtin_popcountll __popcnt64 #ifndef __clang__ #define __m128i_u __m128i @@ -102,6 +104,8 @@ inline int __builtin_clzll(uint64_t x) { #define __F16C__ 1 #endif +#endif // _MSC_VER + #define FAISS_ALWAYS_INLINE __forceinline #else diff --git a/faiss/impl/simd_result_handlers.h b/faiss/impl/simd_result_handlers.h index e12277a690..baa640d865 100644 --- a/faiss/impl/simd_result_handlers.h +++ b/faiss/impl/simd_result_handlers.h @@ -576,7 +576,7 @@ struct RangeHandler : ResultHandlerCompare { normalizers = norms; for (int q = 0; q < nq; ++q) { thresholds[q] = - normalizers[2 * q] * (radius - normalizers[2 * q + 1]); + int(normalizers[2 * q] * (radius - normalizers[2 * q + 1])); } } diff --git a/faiss/impl/zerocopy_io.cpp b/faiss/impl/zerocopy_io.cpp new file mode 100644 index 0000000000..2d37f6a8cc --- /dev/null +++ b/faiss/impl/zerocopy_io.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace faiss { + +ZeroCopyIOReader::ZeroCopyIOReader(uint8_t* data, size_t size) + : data_(data), rp_(0), total_(size) {} + +ZeroCopyIOReader::~ZeroCopyIOReader() {} + +size_t ZeroCopyIOReader::get_data_view(void** ptr, size_t size, size_t nitems) { + if (size == 0) { + return nitems; + } + + size_t actual_size = size * nitems; + if (rp_ + size * nitems > total_) { + actual_size = total_ - rp_; + } + + size_t actual_nitems = (actual_size + size - 1) / size; + if (actual_nitems == 0) { + return 0; + } + + // get an address + *ptr = (void*)(reinterpret_cast(data_ + rp_)); + + // alter pos + rp_ += size * actual_nitems; + + return actual_nitems; +} + +void ZeroCopyIOReader::reset() { + rp_ = 0; +} + +size_t ZeroCopyIOReader::operator()(void* ptr, size_t size, size_t nitems) { + if (size * nitems == 0) { + return 0; + } + + if (rp_ >= total_) { + return 0; + } + size_t nremain = (total_ - rp_) / size; + if (nremain < nitems) { + nitems = nremain; + } + memcpy(ptr, (data_ + rp_), size * nitems); + rp_ += size * nitems; + return nitems; +} + +int ZeroCopyIOReader::filedescriptor() { + return -1; // Indicating no file descriptor available for memory buffer +} + +} // namespace faiss diff --git a/faiss/impl/zerocopy_io.h b/faiss/impl/zerocopy_io.h new file mode 100644 index 0000000000..488b5d1e80 --- /dev/null +++ b/faiss/impl/zerocopy_io.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include + +namespace faiss { + +// ZeroCopyIOReader just maps the data from a given pointer. +struct ZeroCopyIOReader : public faiss::IOReader { + uint8_t* data_; + size_t rp_ = 0; + size_t total_ = 0; + + ZeroCopyIOReader(uint8_t* data, size_t size); + ~ZeroCopyIOReader(); + + void reset(); + size_t get_data_view(void** ptr, size_t size, size_t nitems); + size_t operator()(void* ptr, size_t size, size_t nitems) override; + + int filedescriptor() override; +}; + +} // namespace faiss diff --git a/faiss/index_factory.cpp b/faiss/index_factory.cpp index 8ff4bfec7c..b4e0e9a48f 100644 --- a/faiss/index_factory.cpp +++ b/faiss/index_factory.cpp @@ -11,9 +11,6 @@ #include -#include -#include - #include #include @@ -33,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +66,7 @@ namespace { */ bool re_match(const std::string& s, const std::string& pat, std::smatch& sm) { + // @lint-ignore CLANGTIDY return std::regex_match(s, sm, std::regex(pat)); } @@ -164,7 +164,7 @@ const std::string aq_norm_pattern = const std::string paq_def_pattern = "([0-9]+)x([0-9]+)x([0-9]+)"; AdditiveQuantizer::Search_type_t aq_parse_search_type( - std::string stok, + const std::string& stok, MetricType metric) { if (stok == "") { return metric == METRIC_L2 ? AdditiveQuantizer::ST_decompress @@ -177,6 +177,7 @@ AdditiveQuantizer::Search_type_t aq_parse_search_type( std::vector aq_parse_nbits(std::string stok) { std::vector nbits; std::smatch sm; + // @lint-ignore CLANGTIDY while (std::regex_search(stok, sm, std::regex("[^q]([0-9]+)x([0-9]+)"))) { int M = std::stoi(sm[1].str()); int nbit = std::stoi(sm[2].str()); @@ -186,6 +187,8 @@ std::vector aq_parse_nbits(std::string stok) { return nbits; } +const std::string rabitq_pattern = "(RaBitQ)"; + /*************************************************************** * Parse VectorTransform */ @@ -436,6 +439,9 @@ IndexIVF* parse_IndexIVF( } return index_ivf; } + if (match(rabitq_pattern)) { + return new IndexIVFRaBitQ(get_q(), d, nlist, mt); + } return nullptr; } @@ -657,6 +663,11 @@ Index* parse_other_indexes( } } + // IndexRaBitQ + if (match(rabitq_pattern)) { + return new IndexRaBitQ(d, metric); + } + return nullptr; } @@ -766,7 +777,7 @@ std::unique_ptr index_factory_sub( } if (verbose) { - printf("after () normalization: %s %ld parenthesis indexes d=%d\n", + printf("after () normalization: %s %zd parenthesis indexes d=%d\n", description.c_str(), parenthesis_indexes.size(), d); diff --git a/faiss/index_io.h b/faiss/index_io.h index 191d3b9461..8a0654dc9e 100644 --- a/faiss/index_io.h +++ b/faiss/index_io.h @@ -66,6 +66,10 @@ const int IO_FLAG_PQ_SKIP_SDC_TABLE = 32; // try to memmap data (useful to load an ArrayInvertedLists as an // OnDiskInvertedLists) const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000; +// mmap that handles codes for IndexFlatCodes-derived indices and HNSW. +// this is a temporary solution, it is expected to be merged with IO_FLAG_MMAP +// after OnDiskInvertedLists get properly updated. +const int IO_FLAG_MMAP_IFC = 1 << 9; Index* read_index(const char* fname, int io_flags = 0); Index* read_index(FILE* f, int io_flags = 0); diff --git a/faiss/invlists/InvertedLists.cpp b/faiss/invlists/InvertedLists.cpp index ef14bcb973..899ae639cf 100644 --- a/faiss/invlists/InvertedLists.cpp +++ b/faiss/invlists/InvertedLists.cpp @@ -181,7 +181,7 @@ size_t InvertedLists::copy_subset_to( } double InvertedLists::imbalance_factor() const { - std::vector hist(nlist); + std::vector hist(nlist); for (size_t i = 0; i < nlist; i++) { hist[i] = list_size(i); @@ -330,8 +330,8 @@ void ArrayInvertedLists::update_entries( } void ArrayInvertedLists::permute_invlists(const idx_t* map) { - std::vector> new_codes(nlist); - std::vector> new_ids(nlist); + std::vector> new_codes(nlist); + std::vector> new_ids(nlist); for (size_t i = 0; i < nlist; i++) { size_t o = map[i]; diff --git a/faiss/invlists/InvertedLists.h b/faiss/invlists/InvertedLists.h index f8c72c0841..78799a23b2 100644 --- a/faiss/invlists/InvertedLists.h +++ b/faiss/invlists/InvertedLists.h @@ -15,9 +15,11 @@ * the interface. */ -#include #include +#include +#include + namespace faiss { struct InvertedListsIterator { @@ -241,8 +243,8 @@ struct InvertedLists { /// simple (default) implementation as an array of inverted lists struct ArrayInvertedLists : InvertedLists { - std::vector> codes; // binary codes, size nlist - std::vector> ids; ///< Inverted lists for indexes + std::vector> codes; // binary codes, size nlist + std::vector> ids; ///< Inverted lists for indexes ArrayInvertedLists(size_t nlist, size_t code_size); diff --git a/faiss/invlists/InvertedListsIOHook.cpp b/faiss/invlists/InvertedListsIOHook.cpp index 0534a11907..13d8490a8d 100644 --- a/faiss/invlists/InvertedListsIOHook.cpp +++ b/faiss/invlists/InvertedListsIOHook.cpp @@ -13,9 +13,9 @@ #include -#ifndef _MSC_VER +#ifndef _WIN32 #include -#endif // !_MSC_VER +#endif // !_WIN32 namespace faiss { @@ -33,7 +33,7 @@ namespace { /// std::vector that deletes its contents struct IOHookTable : std::vector { IOHookTable() { -#ifndef _MSC_VER +#ifndef _WIN32 push_back(new OnDiskInvertedListsIOHook()); #endif push_back(new BlockInvertedListsIOHook()); diff --git a/faiss/python/__init__.py b/faiss/python/__init__.py index 9d956ebe71..7266da71f3 100644 --- a/faiss/python/__init__.py +++ b/faiss/python/__init__.py @@ -53,6 +53,7 @@ class_wrappers.handle_Linear(Linear) class_wrappers.handle_QINCo(QINCo) class_wrappers.handle_QINCoStep(QINCoStep) +shard_ivf_index_centroids = class_wrappers.handle_shard_ivf_index_centroids(shard_ivf_index_centroids) this_module = sys.modules[__name__] @@ -170,7 +171,7 @@ def replacement_function(*args): add_ref_in_constructor(GpuIndexIVFPQ, 1) add_ref_in_constructor(GpuIndexIVFScalarQuantizer, 1) except NameError as e: - logger.info("Failed to load GPU Faiss: %s. Will not load constructor refs for GPU indexes." % e.args[0]) + logger.info("Failed to load GPU Faiss: %s. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss." % e.args[0]) add_ref_in_constructor(IndexIVFFlat, 0) add_ref_in_constructor(IndexIVFFlatDedup, 0) diff --git a/faiss/python/array_conversions.py b/faiss/python/array_conversions.py index 0c57defe1e..b62c59e4ce 100644 --- a/faiss/python/array_conversions.py +++ b/faiss/python/array_conversions.py @@ -106,6 +106,13 @@ def vector_to_array(v): classname = v.__class__.__name__ if classname.startswith('AlignedTable'): return AlignedTable_to_array(v) + if classname.startswith('MaybeOwnedVector'): + dtype = np.dtype(vector_name_map[classname[16:]]) + a = np.empty(v.size(), dtype=dtype) + if v.size() > 0: + memcpy(swig_ptr(a), v.data(), a.nbytes) + return a + assert classname.endswith('Vector') dtype = np.dtype(vector_name_map[classname[:-6]]) a = np.empty(v.size(), dtype=dtype) @@ -122,6 +129,17 @@ def copy_array_to_vector(a, v): """ copy a numpy array to a vector """ n, = a.shape classname = v.__class__.__name__ + if classname.startswith('MaybeOwnedVector'): + assert v.is_owned, 'cannot copy to an non-owned MaybeOwnedVector' + dtype = np.dtype(vector_name_map[classname[16:]]) + assert dtype == a.dtype, ( + 'cannot copy a %s array to a %s (should be %s)' % ( + a.dtype, classname, dtype)) + v.resize(n) + if n > 0: + memcpy(v.data(), swig_ptr(a), a.nbytes) + return + assert classname.endswith('Vector') dtype = np.dtype(vector_name_map[classname[:-6]]) assert dtype == a.dtype, ( diff --git a/faiss/python/class_wrappers.py b/faiss/python/class_wrappers.py index 607fdd6d29..2491aa8914 100644 --- a/faiss/python/class_wrappers.py +++ b/faiss/python/class_wrappers.py @@ -869,7 +869,7 @@ def replacement_reconstruct_n(self, n0=0, ni=-1, x=None): self.reconstruct_n_c(n0, ni, swig_ptr(x)) return x - def replacement_search(self, x, k): + def replacement_search(self, x, k, *, params=None): x = _check_dtype_uint8(x) n, d = x.shape assert d == self.code_size @@ -878,7 +878,8 @@ def replacement_search(self, x, k): labels = np.empty((n, k), dtype=np.int64) self.search_c(n, swig_ptr(x), k, swig_ptr(distances), - swig_ptr(labels)) + swig_ptr(labels), + params=params) return distances, labels def replacement_search_preassigned(self, x, k, Iq, Dq): @@ -906,12 +907,12 @@ def replacement_search_preassigned(self, x, k, Iq, Dq): ) return D, I - def replacement_range_search(self, x, thresh): + def replacement_range_search(self, x, thresh, *, params=None): n, d = x.shape x = _check_dtype_uint8(x) assert d == self.code_size res = RangeSearchResult(n) - self.range_search_c(n, swig_ptr(x), thresh, res) + self.range_search_c(n, swig_ptr(x), thresh, res, params=params) # get pointers and copy them lims = rev_swig_ptr(res.lims, n + 1).copy() nd = int(lims[-1]) @@ -1037,7 +1038,7 @@ def replacement_vt_train(self, x): def handle_AutoTuneCriterion(the_class): def replacement_set_groundtruth(self, D, I): - if D: + if D is not None: assert I.shape == D.shape self.nq, self.gt_nnn = I.shape self.set_groundtruth_c( @@ -1395,3 +1396,12 @@ def from_torch(self, qinco): the_class.__init__ = replacement_init the_class.from_torch = from_torch + + +def handle_shard_ivf_index_centroids(func): + def wrapper(*args, **kwargs): + args = list(args) + if len(args) > 3 and args[3] is not None: + args[3] = faiss.PyCallbackShardingFunction(args[3]) + return func(*args, **kwargs) + return wrapper diff --git a/faiss/python/loader.py b/faiss/python/loader.py index caef9e5512..c3b7b00c19 100644 --- a/faiss/python/loader.py +++ b/faiss/python/loader.py @@ -108,7 +108,7 @@ def is_sve_supported(): loaded = False has_AVX512 = any("AVX512" in x.upper() for x in instruction_sets) -if has_AVX512: +if has_AVX512 and not loaded: try: logger.info("Loading faiss with AVX512 support.") from .swigfaiss_avx512 import * diff --git a/faiss/python/python_callbacks.cpp b/faiss/python/python_callbacks.cpp index ce36bed437..8b78bf1e43 100644 --- a/faiss/python/python_callbacks.cpp +++ b/faiss/python/python_callbacks.cpp @@ -134,3 +134,27 @@ PyCallbackIDSelector::~PyCallbackIDSelector() { PyThreadLock gil; Py_DECREF(callback); } + +/*********************************************************** + * Callbacks for IVF index sharding + ***********************************************************/ + +PyCallbackShardingFunction::PyCallbackShardingFunction(PyObject* callback) + : callback(callback) { + PyThreadLock gil; + Py_INCREF(callback); +} + +int64_t PyCallbackShardingFunction::operator()(int64_t i, int64_t shard_count) { + PyThreadLock gil; + PyObject* shard_id = PyObject_CallFunction(callback, "LL", i, shard_count); + if (shard_id == nullptr) { + FAISS_THROW_MSG("propagate py error"); + } + return PyLong_AsLongLong(shard_id); +} + +PyCallbackShardingFunction::~PyCallbackShardingFunction() { + PyThreadLock gil; + Py_DECREF(callback); +} diff --git a/faiss/python/python_callbacks.h b/faiss/python/python_callbacks.h index fa8ebaf53c..072e69f91f 100644 --- a/faiss/python/python_callbacks.h +++ b/faiss/python/python_callbacks.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include @@ -58,3 +59,24 @@ struct PyCallbackIDSelector : faiss::IDSelector { ~PyCallbackIDSelector() override; }; + +/*********************************************************** + * Callbacks for IVF index sharding + ***********************************************************/ + +struct PyCallbackShardingFunction : faiss::ivflib::ShardingFunction { + PyObject* callback; + + explicit PyCallbackShardingFunction(PyObject* callback); + + int64_t operator()(int64_t i, int64_t shard_count) override; + + ~PyCallbackShardingFunction() override; + + PyCallbackShardingFunction(const PyCallbackShardingFunction&) = delete; + PyCallbackShardingFunction(PyCallbackShardingFunction&&) noexcept = default; + PyCallbackShardingFunction& operator=(const PyCallbackShardingFunction&) = + default; + PyCallbackShardingFunction& operator=(PyCallbackShardingFunction&&) = + default; +}; diff --git a/faiss/python/setup.py b/faiss/python/setup.py index 23611cb370..b30cfa7813 100644 --- a/faiss/python/setup.py +++ b/faiss/python/setup.py @@ -105,7 +105,7 @@ """ setup( name="faiss", - version="1.10.0", + version="1.11.0", description="A library for efficient similarity search and clustering of dense vectors", long_description=long_description, url="https://github.com/facebookresearch/faiss", diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index 493e42ef0e..67d903bd92 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -32,6 +32,7 @@ #pragma SWIG nowarn=341 #pragma SWIG nowarn=512 #pragma SWIG nowarn=362 +#pragma SWIG nowarn=509 // we need explict control of these typedefs... // %include @@ -81,6 +82,11 @@ typedef uint64_t size_t; #endif +#include + +#include +#include +#include #include #include @@ -184,6 +190,10 @@ typedef uint64_t size_t; #include +#include +#include +#include + %} /******************************************************** @@ -280,6 +290,9 @@ namespace std { %template(RepeatVector) std::vector; %template(ClusteringIterationStatsVector) std::vector; %template(ParameterRangeVector) std::vector; +%template(MaybeOwnedVectorUInt8Vector) std::vector >; +%template(MaybeOwnedVectorInt32Vector) std::vector >; +%template(MaybeOwnedVectorFloat32Vector) std::vector >; #ifndef SWIGWIN %template(OnDiskOneListVector) std::vector; @@ -506,6 +519,14 @@ void gpu_sync_all_devices() %include +%include + +%ignore faiss::MmappedFileMappingOwner::p_impl; + +%include +%include +%include + %newobject *::get_FlatCodesDistanceComputer() const; %include %include @@ -633,6 +654,9 @@ struct faiss::simd16uint16 {}; %include +%include +%include +%include %ignore faiss::BufferList::Buffer; %ignore faiss::RangeSearchPartialResult::QueryResult; @@ -744,6 +768,8 @@ struct faiss::simd16uint16 {}; DOWNCAST ( IndexShardsIVF ) DOWNCAST2 ( IndexShards, IndexShardsTemplateT_faiss__Index_t ) DOWNCAST2 ( IndexReplicas, IndexReplicasTemplateT_faiss__Index_t ) + DOWNCAST ( IndexRaBitQ ) + DOWNCAST ( IndexIVFRaBitQ ) DOWNCAST ( IndexIVFIndependentQuantizer) DOWNCAST ( IndexIVFPQR ) DOWNCAST ( IndexIVFPQ ) @@ -992,6 +1018,10 @@ faiss::Quantizer * downcast_Quantizer (faiss::Quantizer *aq) %template(AlignedTableUint16) faiss::AlignedTable; %template(AlignedTableFloat32) faiss::AlignedTable; +%template(MaybeOwnedVectorUInt8) faiss::MaybeOwnedVector; +%template(MaybeOwnedVectorInt32) faiss::MaybeOwnedVector; +%template(MaybeOwnedVectorFloat32) faiss::MaybeOwnedVector; + // SWIG seems to have some trouble resolving function template types here, so // declare explicitly diff --git a/faiss/utils/approx_topk_hamming/approx_topk_hamming.h b/faiss/utils/approx_topk_hamming/approx_topk_hamming.h index 68d8e8c9f0..9f8d211956 100644 --- a/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +++ b/faiss/utils/approx_topk_hamming/approx_topk_hamming.h @@ -46,9 +46,11 @@ struct HeapWithBucketsForHamming32< // output distances int* const __restrict bh_val, // output indices, each being within [0, n) range - int64_t* const __restrict bh_ids) { + int64_t* const __restrict bh_ids, + // optional id selector for filtering + const IDSelector* sel = nullptr) { // forward a call to bs_addn with 1 beam - bs_addn(1, n, hc, binaryVectors, k, bh_val, bh_ids); + bs_addn(1, n, hc, binaryVectors, k, bh_val, bh_ids, sel); } static void bs_addn( @@ -66,7 +68,9 @@ struct HeapWithBucketsForHamming32< int* const __restrict bh_val, // output indices, each being within [0, n_per_beam * beam_size) // range - int64_t* const __restrict bh_ids) { + int64_t* const __restrict bh_ids, + // optional id selector for filtering + const IDSelector* sel = nullptr) { // using C = CMax; @@ -95,11 +99,22 @@ struct HeapWithBucketsForHamming32< for (uint32_t ip = 0; ip < nb; ip += NBUCKETS) { for (uint32_t j = 0; j < NBUCKETS_8; j++) { uint32_t hamming_distances[8]; + uint8_t valid_counter = 0; for (size_t j8 = 0; j8 < 8; j8++) { - hamming_distances[j8] = hc.hamming( - binary_vectors + - (j8 + j * 8 + ip + n_per_beam * beam_index) * - code_size); + const uint32_t idx = + j8 + j * 8 + ip + n_per_beam * beam_index; + if (!sel || sel->is_member(idx)) { + hamming_distances[j8] = hc.hamming( + binary_vectors + idx * code_size); + valid_counter++; + } else { + hamming_distances[j8] = + std::numeric_limits::max(); + } + } + + if (valid_counter == 8) { + continue; // Skip if all vectors are filtered out } // loop. Compiler should get rid of unneeded ops @@ -157,7 +172,8 @@ struct HeapWithBucketsForHamming32< const auto value = min_distances_scalar[j8]; const auto index = min_indices_scalar[j8]; - if (C::cmp2(bh_val[0], value, bh_ids[0], index)) { + if (value < std::numeric_limits::max() && + C::cmp2(bh_val[0], value, bh_ids[0], index)) { heap_replace_top( k, bh_val, bh_ids, value, index); } @@ -168,11 +184,13 @@ struct HeapWithBucketsForHamming32< // process leftovers for (uint32_t ip = nb; ip < n_per_beam; ip++) { const auto index = ip + n_per_beam * beam_index; - const auto value = - hc.hamming(binary_vectors + (index)*code_size); + if (!sel || sel->is_member(index)) { + const auto value = + hc.hamming(binary_vectors + (index)*code_size); - if (C::cmp(bh_val[0], value)) { - heap_replace_top(k, bh_val, bh_ids, value, index); + if (C::cmp(bh_val[0], value)) { + heap_replace_top(k, bh_val, bh_ids, value, index); + } } } } diff --git a/faiss/utils/hamming.cpp b/faiss/utils/hamming.cpp index 3136ef9f51..3743e82b69 100644 --- a/faiss/utils/hamming.cpp +++ b/faiss/utils/hamming.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -62,15 +63,15 @@ void hammings( const uint64_t* __restrict bs2, size_t n1, size_t n2, - size_t nwords, + size_t nbits, hamdis_t* __restrict dis) { size_t i, j; - n1 *= nwords; - n2 *= nwords; - for (i = 0; i < n1; i += nwords) { - const uint64_t* bs1_ = bs1 + i; - for (j = 0; j < n2; j += nwords) - dis[j] = hamming(bs1_, bs2 + j, nwords); + const size_t nwords = nbits / 64; + for (i = 0; i < n1; i++) { + const uint64_t* __restrict bs1_ = bs1 + i * nwords; + hamdis_t* __restrict dis_ = dis + i * n2; + for (j = 0; j < n2; j++) + dis_[j] = hamming(bs1_, bs2 + j * nwords, nwords); } } @@ -171,7 +172,8 @@ void hammings_knn_hc( size_t n2, bool order = true, bool init_heap = true, - ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK) { + ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK, + const faiss::IDSelector* sel = nullptr) { size_t k = ha->k; if (init_heap) ha->heapify(); @@ -204,7 +206,7 @@ void hammings_knn_hc( NB, \ BD, \ HammingComputer>:: \ - addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_); \ + addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_, sel); \ break; switch (approx_topk_mode) { @@ -214,6 +216,9 @@ void hammings_knn_hc( HANDLE_APPROX(32, 2) default: { for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) { + if (sel && !sel->is_member(j)) { + continue; + } dis = hc.hamming(bs2_); if (dis < bh_val_[0]) { faiss::maxheap_replace_top( @@ -238,7 +243,8 @@ void hammings_knn_mc( size_t nb, size_t k, int32_t* __restrict distances, - int64_t* __restrict labels) { + int64_t* __restrict labels, + const faiss::IDSelector* sel) { const int nBuckets = bytes_per_code * 8 + 1; std::vector all_counters(na * nBuckets, 0); std::unique_ptr all_ids_per_dis(new int64_t[na * nBuckets * k]); @@ -259,7 +265,9 @@ void hammings_knn_mc( #pragma omp parallel for num_threads(num_omp_threads) for (int64_t i = 0; i < na; ++i) { for (size_t j = j0; j < j1; ++j) { - cs[i].update_counter(b + j * bytes_per_code, j); + if (!sel || sel->is_member(j)) { + cs[i].update_counter(b + j * bytes_per_code, j); + } } } } @@ -291,7 +299,8 @@ void hamming_range_search( size_t nb, int radius, size_t code_size, - RangeSearchResult* res) { + RangeSearchResult* res, + const faiss::IDSelector* sel) { #pragma omp parallel num_threads(num_omp_threads) { RangeSearchPartialResult pres(res); @@ -303,9 +312,11 @@ void hamming_range_search( RangeQueryResult& qres = pres.new_result(i); for (size_t j = 0; j < nb; j++) { - int dis = hc.hamming(yi); - if (dis < radius) { - qres.add(dis, j); + if (!sel || sel->is_member(j)) { + int dis = hc.hamming(yi); + if (dis < radius) { + qres.add(dis, j); + } } yi += code_size; } @@ -489,10 +500,21 @@ void hammings_knn_hc( size_t nb, size_t ncodes, int order, - ApproxTopK_mode_t approx_topk_mode) { + ApproxTopK_mode_t approx_topk_mode, + const faiss::IDSelector* sel) { Run_hammings_knn_hc r; dispatch_HammingComputer( - ncodes, r, ncodes, ha, a, b, nb, order, true, approx_topk_mode); + ncodes, + r, + ncodes, + ha, + a, + b, + nb, + order, + true, + approx_topk_mode, + sel); } void hammings_knn_mc( @@ -503,10 +525,11 @@ void hammings_knn_mc( size_t k, size_t ncodes, int32_t* __restrict distances, - int64_t* __restrict labels) { + int64_t* __restrict labels, + const faiss::IDSelector* sel) { Run_hammings_knn_mc r; dispatch_HammingComputer( - ncodes, r, ncodes, a, b, na, nb, k, distances, labels); + ncodes, r, ncodes, a, b, na, nb, k, distances, labels, sel); } void hamming_range_search( @@ -516,10 +539,11 @@ void hamming_range_search( size_t nb, int radius, size_t code_size, - RangeSearchResult* result) { + RangeSearchResult* result, + const faiss::IDSelector* sel) { Run_hamming_range_search r; dispatch_HammingComputer( - code_size, r, a, b, na, nb, radius, code_size, result); + code_size, r, a, b, na, nb, radius, code_size, result, sel); } /* Count number of matches given a max threshold */ diff --git a/faiss/utils/hamming.h b/faiss/utils/hamming.h index 85f9730e5c..3f3f488bc5 100644 --- a/faiss/utils/hamming.h +++ b/faiss/utils/hamming.h @@ -27,6 +27,7 @@ #include +#include #include #include @@ -135,7 +136,8 @@ void hammings_knn_hc( size_t nb, size_t ncodes, int ordered, - ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK); + ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK, + const faiss::IDSelector* sel = nullptr); /* Legacy alias to hammings_knn_hc. */ void hammings_knn( @@ -166,7 +168,8 @@ void hammings_knn_mc( size_t k, size_t ncodes, int32_t* distances, - int64_t* labels); + int64_t* labels, + const faiss::IDSelector* sel = nullptr); /** same as hammings_knn except we are doing a range search with radius */ void hamming_range_search( @@ -176,7 +179,8 @@ void hamming_range_search( size_t nb, int radius, size_t ncodes, - RangeSearchResult* result); + RangeSearchResult* result, + const faiss::IDSelector* sel = nullptr); /* Counting the number of matches or of cross-matches (without returning them) For use with function that assume pre-allocated memory */ diff --git a/faiss/utils/utils.cpp b/faiss/utils/utils.cpp index 4c9b7c92a1..653b240bc3 100644 --- a/faiss/utils/utils.cpp +++ b/faiss/utils/utils.cpp @@ -388,7 +388,7 @@ size_t ranklist_intersection_size( return count; } -double imbalance_factor(int k, const int* hist) { +double imbalance_factor(int k, const int64_t* hist) { double tot = 0, uf = 0; for (int i = 0; i < k; i++) { @@ -400,9 +400,9 @@ double imbalance_factor(int k, const int* hist) { return uf; } -double imbalance_factor(int n, int k, const int64_t* assign) { - std::vector hist(k, 0); - for (int i = 0; i < n; i++) { +double imbalance_factor(int64_t n, int k, const int64_t* assign) { + std::vector hist(k, 0); + for (int64_t i = 0; i < n; i++) { hist[assign[i]]++; } diff --git a/faiss/utils/utils.h b/faiss/utils/utils.h index 901459d1c7..7d75b3200d 100644 --- a/faiss/utils/utils.h +++ b/faiss/utils/utils.h @@ -92,10 +92,10 @@ size_t merge_result_table_with( /// a balanced assignment has a IF of 1, a completely unbalanced assignment has /// an IF = k. -double imbalance_factor(int n, int k, const int64_t* assign); +double imbalance_factor(int64_t n, int k, const int64_t* assign); /// same, takes a histogram as input -double imbalance_factor(int k, const int* hist); +double imbalance_factor(int k, const int64_t* hist); /// compute histogram on v int ivec_hist(size_t n, const int* v, int vmax, int* hist); diff --git a/perf_tests/bench_scalar_quantizer_distance.cpp b/perf_tests/bench_scalar_quantizer_distance.cpp index 14945c58c4..8a32d69c71 100644 --- a/perf_tests/bench_scalar_quantizer_distance.cpp +++ b/perf_tests/bench_scalar_quantizer_distance.cpp @@ -23,8 +23,8 @@ DEFINE_uint32(iterations, 20, "iterations"); static void bench_distance( benchmark::State& state, ScalarQuantizer::QuantizerType type, - int n, - int d) { + int d, + int n) { std::vector x(d * n); float_rand(x.data(), d * n, 12345); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dfab76e024..285b9090ed 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -36,6 +36,8 @@ set(FAISS_TEST_SRC test_callback.cpp test_utils.cpp test_hamming.cpp + test_mmap.cpp + test_zerocopy.cpp ) add_executable(faiss_test ${FAISS_TEST_SRC}) diff --git a/tests/test_factory.py b/tests/test_factory.py index 4196895540..2246eb8c10 100644 --- a/tests/test_factory.py +++ b/tests/test_factory.py @@ -34,7 +34,7 @@ def test_factory_1(self): except RuntimeError: pass else: - assert False, "should do a runtime error" + raise AssertionError("should do a runtime error") def test_factory_2(self): @@ -62,6 +62,14 @@ def test_factory_5(self): assert index.sa_code_size() == 64 * 4 assert index.chain.at(0).d_out == 64 + def test_factory_6(self): + index = faiss.index_factory(128, "RaBitQ") + assert index.d == 128 + assert index.metric_type == faiss.METRIC_L2 + index = faiss.index_factory(128, "IVF256,RaBitQ") + assert index.d == 128 + assert index.metric_type == faiss.METRIC_L2 + def test_factory_HNSW(self): index = faiss.index_factory(12, "HNSW32") assert index.storage.sa_code_size() == 12 * 4 diff --git a/tests/test_factory_tools.cpp b/tests/test_factory_tools.cpp index 2e77645e80..f5dda2ad68 100644 --- a/tests/test_factory_tools.cpp +++ b/tests/test_factory_tools.cpp @@ -24,6 +24,8 @@ TEST(TestFactoryTools, TestReverseIndexFactory) { "HNSW32", "SQ8", "SQfp16", + "NSG24,Flat", + "NSG16,SQ8", }) { std::unique_ptr index{index_factory(64, factory)}; ASSERT_TRUE(index); @@ -32,6 +34,8 @@ TEST(TestFactoryTools, TestReverseIndexFactory) { using Case = std::pair; for (auto [src, dst] : { Case{"SQ8,RFlat", "SQ8,Refine(Flat)"}, + Case{"NSG", "NSG32,Flat"}, + Case{"NSG,PQ8", "NSG32,PQ8x8"}, }) { std::unique_ptr index{index_factory(64, src)}; ASSERT_TRUE(index); diff --git a/tests/test_fast_scan_ivf.py b/tests/test_fast_scan_ivf.py index 75c9500f82..a1d6a21440 100644 --- a/tests/test_fast_scan_ivf.py +++ b/tests/test_fast_scan_ivf.py @@ -270,8 +270,9 @@ def test_equiv_pq(self): index_pq = faiss.index_factory(32, "PQ16x4np") index_pq.pq = index.pq index_pq.is_trained = True - index_pq.codes = faiss. downcast_InvertedLists( + codevec = faiss.downcast_InvertedLists( index.invlists).codes.at(0) + index_pq.codes = faiss.MaybeOwnedVectorUInt8(codevec) index_pq.ntotal = index.ntotal Dnew, Inew = index_pq.search(xq, 4) diff --git a/tests/test_hamming.cpp b/tests/test_hamming.cpp index 423f9736d2..e4815ae93d 100644 --- a/tests/test_hamming.cpp +++ b/tests/test_hamming.cpp @@ -17,7 +17,7 @@ template std::string print_data( std::shared_ptr> data, const size_t divider) { - std::string ret = ""; + std::string ret; for (int i = 0; i < data->size(); ++i) { if (i % divider) { ret += " "; @@ -38,8 +38,11 @@ std::stringstream get_correct_hamming_example( std::shared_ptr> a, std::shared_ptr> b, std::shared_ptr> true_ids, - std::shared_ptr> true_distances) { - assert(nb > k); + // regular Hamming (bit-level distances) + std::shared_ptr> true_bit_distances, + // generalized Hamming (byte-level distances) + std::shared_ptr> true_byte_distances) { + assert(nb >= k); // Initialization std::default_random_engine rng(123); @@ -51,11 +54,12 @@ std::stringstream get_correct_hamming_example( a->resize(na * code_size, 1); // query vectors are all 1 b->clear(); b->resize(nb * code_size, 2); // database vectors are all 2 - true_ids->clear(); true_ids->reserve(nresults); - true_distances->clear(); - true_distances->reserve(nresults); + true_bit_distances->clear(); + true_bit_distances->reserve(nresults); + true_byte_distances->clear(); + true_byte_distances->reserve(nresults); // define correct ids (must be unique) std::set correct_ids; @@ -72,21 +76,32 @@ std::stringstream get_correct_hamming_example( // assemble true id and distance at locations true_ids->push_back(id); - true_distances->push_back(code_size - nmatches); // hamming dist + true_bit_distances->push_back( + (code_size > nmatches ? code_size - nmatches : 0) * + /* per-code distance between 1 and 2 (0b01 and 0b10) */ + 2); + true_byte_distances->push_back( + (code_size > nmatches ? code_size - nmatches : 0)); for (size_t i = 0; i < nmatches; ++i) { - b->begin()[id * code_size + i] = 1; + b->begin()[id * code_size + i] = 1; // query byte value } } - // true_ids and true_distances only contain results for the first query - // each query is identical, so copy the first query na-1 times + // true_ids, true_bit_distances, true_byte_distances only contain results + // for the first query. + // Query vectors are identical (all 1s), so copy the first sets of k + // distances na-1 times. for (size_t i = 1; i < na; ++i) { true_ids->insert( true_ids->end(), true_ids->begin(), true_ids->begin() + k); - true_distances->insert( - true_distances->end(), - true_distances->begin(), - true_distances->begin() + k); + true_bit_distances->insert( + true_bit_distances->end(), + true_bit_distances->begin(), + true_bit_distances->begin() + k); + true_byte_distances->insert( + true_byte_distances->end(), + true_byte_distances->begin(), + true_byte_distances->begin() + k); } // assemble string for debugging @@ -98,7 +113,10 @@ std::stringstream get_correct_hamming_example( << "a: " << print_data(a, code_size) << std::endl << "b: " << print_data(b, code_size) << std::endl << "true_ids: " << print_data(true_ids, k) << std::endl - << "true_distances: " << print_data(true_distances, k) << std::endl; + << "true_bit_distances: " << print_data(true_bit_distances, k) + << std::endl + << "true_byte_distances: " << print_data(true_byte_distances, k) + << std::endl; return ret; } @@ -261,14 +279,23 @@ TEST(TestHamming, test_hamming_knn) { auto a = std::make_shared>(); auto b = std::make_shared>(); auto true_ids = std::make_shared>(); - auto true_distances = std::make_shared>(); + auto true_bit_distances = std::make_shared>(); + auto true_byte_distances = std::make_shared>(); // 8, 16, 32 are cases - 24 will hit default case // all should be multiples of 8 for (auto code_size : {8, 16, 24, 32}) { // get example std::stringstream assert_str = get_correct_hamming_example( - na, nb, k, code_size, a, b, true_ids, true_distances); + na, + nb, + k, + code_size, + a, + b, + true_ids, + true_bit_distances, + true_byte_distances); // run test on generalized_hammings_knn_hc std::vector ids_gen(na * k); @@ -278,7 +305,7 @@ TEST(TestHamming, test_hamming_knn) { faiss::generalized_hammings_knn_hc( &res, a->data(), b->data(), nb, code_size, true); ASSERT_EQ(ids_gen, *true_ids) << assert_str.str(); - ASSERT_EQ(dist_gen, *true_distances) << assert_str.str(); + ASSERT_EQ(dist_gen, *true_byte_distances) << assert_str.str(); // run test on hammings_knn std::vector ids_ham_knn(na * k, 0); @@ -286,10 +313,23 @@ TEST(TestHamming, test_hamming_knn) { res = {na, k, ids_ham_knn.data(), dist_ham_knn.data()}; faiss::hammings_knn(&res, a->data(), b->data(), nb, code_size, true); ASSERT_EQ(ids_ham_knn, *true_ids) << assert_str.str(); - // hammings_knn results in twice the distance for some reason :/ - for (int i = 0; i < dist_ham_knn.size(); ++i) { - dist_ham_knn[i] /= 2; - } - ASSERT_EQ(dist_ham_knn, *true_distances) << assert_str.str(); + ASSERT_EQ(dist_ham_knn, *true_bit_distances) << assert_str.str(); + } + + for (auto code_size : {8, 16, 24, 32}) { + std::stringstream assert_str = get_correct_hamming_example( + na, + nb, + /* k */ nb, // faiss::hammings computes all distances + code_size, + a, + b, + true_ids, + true_bit_distances, + true_byte_distances); + std::vector dist_gen(na * nb); + faiss::hammings( + a->data(), b->data(), na, nb, code_size, dist_gen.data()); + EXPECT_EQ(dist_gen, *true_bit_distances) << assert_str.str(); } } diff --git a/tests/test_hnsw.cpp b/tests/test_hnsw.cpp index b3c93a861e..9c33c08a9e 100644 --- a/tests/test_hnsw.cpp +++ b/tests/test_hnsw.cpp @@ -193,6 +193,27 @@ TEST(HNSW, Test_popmin_infinite_distances) { } } +TEST(HNSW, Test_IndexHNSW_METRIC_Lp) { + // Create an HNSW index with METRIC_Lp and metric_arg = 3 + faiss::IndexFlat storage_index(1, faiss::METRIC_Lp); + storage_index.metric_arg = 3; + faiss::IndexHNSW index(&storage_index, 32); + + // Add a single data point + float data[1] = {0.0}; + index.add(1, data); + + // Prepare a query + float query[1] = {2.0}; + float distance; + faiss::idx_t label; + + index.search(1, query, 1, &distance, &label); + + EXPECT_NEAR(distance, 8.0, 1e-5); // Distance should be 8.0 (2^3) + EXPECT_EQ(label, 0); // Label should be 0 +} + class HNSWTest : public testing::Test { protected: HNSWTest() { @@ -582,6 +603,16 @@ TEST_F(HNSWTest, TEST_search_neighbors_to_add) { } } +TEST_F(HNSWTest, TEST_nb_neighbors_bound) { + omp_set_num_threads(1); + EXPECT_EQ(index->hnsw.nb_neighbors(0), 8); + EXPECT_EQ(index->hnsw.nb_neighbors(1), 4); + EXPECT_EQ(index->hnsw.nb_neighbors(2), 4); + EXPECT_EQ(index->hnsw.nb_neighbors(3), 4); + // picking a large number to trigger an exception based on checking bounds + EXPECT_THROW(index->hnsw.nb_neighbors(100), faiss::FaissException); +} + TEST_F(HNSWTest, TEST_search_level_0) { omp_set_num_threads(1); std::vector I(k * nq); diff --git a/tests/test_io.py b/tests/test_io.py index 3cbd0a6e10..e2c5e69a18 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -11,6 +11,7 @@ import io import sys import pickle +import platform from multiprocessing.pool import ThreadPool from common_faiss_tests import get_dataset_2 @@ -481,3 +482,53 @@ def test_reader(self): finally: if os.path.exists(fname): os.unlink(fname) + + + +class TestIOFlatMMap(unittest.TestCase): + @unittest.skipIf( + platform.system() not in ["Windows", "Linux"], + "supported OSes only" + ) + def test_mmap(self): + xt, xb, xq = get_dataset_2(32, 0, 100, 50) + index = faiss.index_factory(32, "SQfp16", faiss.METRIC_L2) + # does not need training + index.add(xb) + Dref, Iref = index.search(xq, 10) + + fd, fname = tempfile.mkstemp() + os.close(fd) + + index2 = None + try: + faiss.write_index(index, fname) + index2 = faiss.read_index(fname, faiss.IO_FLAG_MMAP_IFC) + Dnew, Inew = index2.search(xq, 10) + np.testing.assert_array_equal(Iref, Inew) + np.testing.assert_array_equal(Dref, Dnew) + finally: + del index2 + + if os.path.exists(fname): + # skip the error. On Windows, index2 holds the handle file, + # so it cannot be ensured that the file can be deleted + # unless index2 is collected by a GC + try: + os.unlink(fname) + except: + pass + + def test_zerocopy(self): + xt, xb, xq = get_dataset_2(32, 0, 100, 50) + index = faiss.index_factory(32, "SQfp16", faiss.METRIC_L2) + # does not need training + index.add(xb) + Dref, Iref = index.search(xq, 10) + + serialized_index = faiss.serialize_index(index) + reader = faiss.ZeroCopyIOReader(faiss.swig_ptr(serialized_index), serialized_index.size) + index2 = faiss.read_index(reader) + Dnew, Inew = index2.search(xq, 10) + np.testing.assert_array_equal(Iref, Inew) + np.testing.assert_array_equal(Dref, Dnew) diff --git a/tests/test_ivflib.py b/tests/test_ivflib.py index d905f3d486..4bcad0c0c5 100644 --- a/tests/test_ivflib.py +++ b/tests/test_ivflib.py @@ -8,6 +8,9 @@ import unittest import faiss import numpy as np +import os +import random + class TestIVFlib(unittest.TestCase): @@ -180,3 +183,191 @@ def test_small_data(self): assert np.all(lims == ref_lims) assert np.all(D == ref_D) assert np.all(I == ref_I) + + +class TestIvfSharding(unittest.TestCase): + d = 32 + nlist = 100 + nb = 1000 + + def custom_sharding_function(self, i, _): + return 1 if i % 2 == 0 else 7 + + # Mimics the default in DefaultShardingFunction. + # This impl is just used for verification. + def default_sharding_function(self, i, shard_count): + return i % shard_count + + def verify_sharded_ivf_indexes( + self, template, xb, shard_count, sharding_function, generate_ids=True): + sharded_indexes_counters = [0] * shard_count + sharded_indexes = [] + for i in range(shard_count): + if xb[0].dtype.name == 'uint8': + index = faiss.read_index_binary(template % i) + else: + index = faiss.read_index(template % i) + sharded_indexes.append(index) + + # Reconstruct and verify each centroid + if generate_ids: + for i in range(len(xb)): + shard_id = sharding_function(i, shard_count) + reconstructed = sharded_indexes[shard_id].quantizer.reconstruct(i) + np.testing.assert_array_equal(reconstructed, xb[i]) + else: + for i in range(len(xb)): + shard_id = sharding_function(i, shard_count) + reconstructed = sharded_indexes[shard_id].quantizer.reconstruct( + sharded_indexes_counters[shard_id]) + sharded_indexes_counters[shard_id] += 1 + np.testing.assert_array_equal(reconstructed, xb[i]) + + # Clean up + for i in range(shard_count): + os.remove(template % i) + + def test_save_index_shards_by_centroids_no_op(self): + quantizer = faiss.IndexFlatL2(self.d) + index = faiss.IndexIVFFlat(quantizer, self.d, self.nlist) + with self.assertRaises(RuntimeError): + faiss.shard_ivf_index_centroids( + index, + 10, + "shard.%d.index", + None + ) + + def test_save_index_shards_by_centroids_flat_quantizer_default_sharding( + self): + xb = np.random.rand(self.nb, self.d).astype('float32') + quantizer = faiss.IndexFlatL2(self.d) + index = faiss.IndexIVFFlat(quantizer, self.d, self.nlist) + shard_count = 3 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_ivf_index_centroids( + index, + shard_count, + template, + None, + True + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function) + + def test_save_index_shards_by_centroids_flat_quantizer_custom_sharding( + self): + xb = np.random.rand(self.nb, self.d).astype('float32') + quantizer = faiss.IndexFlatL2(self.d) + index = faiss.IndexIVFFlat(quantizer, self.d, self.nlist) + shard_count = 20 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_ivf_index_centroids( + index, + shard_count, + template, + self.custom_sharding_function, + True + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.custom_sharding_function) + + def test_save_index_shards_by_centroids_hnsw_quantizer(self): + xb = np.random.rand(self.nb, self.d).astype('float32') + quantizer = faiss.IndexHNSWFlat(self.d, 32) + index = faiss.IndexIVFFlat(quantizer, self.d, self.nlist) + shard_count = 17 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_ivf_index_centroids( + index, + shard_count, + template, + None, + True + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function) + + def test_save_index_shards_by_centroids_binary_flat_quantizer(self): + xb = np.random.randint(256, size=(self.nb, int(self.d / 8))).astype('uint8') + quantizer = faiss.IndexBinaryFlat(self.d) + index = faiss.IndexBinaryIVF(quantizer, self.d, self.nlist) + shard_count = 11 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_binary_ivf_index_centroids( + index, + shard_count, + template, + None, + True + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function) + + def test_save_index_shards_by_centroids_binary_hnsw_quantizer(self): + xb = np.random.randint(256, size=(self.nb, int(self.d / 8))).astype('uint8') + quantizer = faiss.IndexBinaryHNSW(self.d, 32) + index = faiss.IndexBinaryIVF(quantizer, self.d, self.nlist) + shard_count = 13 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_binary_ivf_index_centroids( + index, + shard_count, + template, + None, + True + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function) + + def test_save_index_shards_without_id_generation(self): + xb = np.random.randint(256, size=(self.nb, int(self.d / 8))).astype('uint8') + quantizer = faiss.IndexBinaryHNSW(self.d, 32) + index = faiss.IndexBinaryIVF(quantizer, self.d, self.nlist) + shard_count = 5 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_binary_ivf_index_centroids( + index, + shard_count, + template, + None, + False + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function, False) + + xb = np.random.rand(self.nb, self.d).astype('float32') + quantizer = faiss.IndexHNSWFlat(self.d, 32) + index = faiss.IndexIVFFlat(quantizer, self.d, self.nlist) + shard_count = 23 + + index.quantizer.add(xb) + + template = str(random.randint(0, 100000)) + "shard.%d.index" + faiss.shard_ivf_index_centroids( + index, + shard_count, + template, + None, + False + ) + self.verify_sharded_ivf_indexes( + template, xb, shard_count, self.default_sharding_function, False) diff --git a/tests/test_mmap.cpp b/tests/test_mmap.cpp new file mode 100644 index 0000000000..78549d6878 --- /dev/null +++ b/tests/test_mmap.cpp @@ -0,0 +1,271 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +std::vector make_data(const size_t n, const size_t d, size_t seed) { + std::vector database(n * d); + std::mt19937 rng(seed); + std::uniform_real_distribution distrib; + + for (size_t i = 0; i < n * d; i++) { + database[i] = distrib(rng); + } + return database; +} + +std::vector make_binary_data( + const size_t n, + const size_t d, + size_t seed) { + std::vector database(n * d); + std::mt19937 rng(seed); + std::uniform_int_distribution distrib(0, 255); + + for (size_t i = 0; i < n * d; i++) { + database[i] = distrib(rng); + } + return database; +} + +} // namespace + +// the logic is the following: +// 1. generate two flatcodes-based indices, Index1 and Index2 +// 2. serialize both indices into std::vector<> buffers, Buf1 and Buf2 +// 3. save Buf1 into a temporary file, File1 +// 4. deserialize Index1 using mmap feature on File1 into Index1MM +// 5. ensure that Index1MM acts as Index2 if we write the data from Buf2 +// on top of the existing File1 +// 6. ensure that Index1MM acts as Index1 if we write the data from Buf1 +// on top of the existing File1 again + +TEST(TestMmap, mmap_flatcodes) { +#ifdef _AIX + GTEST_SKIP() << "Skipping test on AIX."; +#endif + // generate data + const size_t nt = 1000; + const size_t nq = 10; + const size_t d = 32; + const size_t k = 25; + + std::vector xt1 = make_data(nt, d, 123); + std::vector xt2 = make_data(nt, d, 456); + std::vector xq = make_data(nq, d, 789); + + // ensure that the data is different + ASSERT_NE(xt1, xt2); + + // make index1 and create reference results + faiss::IndexFlatL2 index1(d); + index1.train(nt, xt1.data()); + index1.add(nt, xt1.data()); + + std::vector ref_dis_1(k * nq); + std::vector ref_ids_1(k * nq); + index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data()); + + // make index2 and create reference results + faiss::IndexFlatL2 index2(d); + index2.train(nt, xt2.data()); + index2.add(nt, xt2.data()); + + std::vector ref_dis_2(k * nq); + std::vector ref_ids_2(k * nq); + index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data()); + + // ensure that the results are different + ASSERT_NE(ref_dis_1, ref_dis_2); + ASSERT_NE(ref_ids_1, ref_ids_2); + + // serialize both in a form of vectors + faiss::VectorIOWriter wr1; + faiss::write_index(&index1, &wr1); + + faiss::VectorIOWriter wr2; + faiss::write_index(&index2, &wr2); + + // generate a temporary file and write index1 into it + std::string tmpname = std::tmpnam(nullptr); + + { + std::ofstream ofs(tmpname); + ofs.write((const char*)wr1.data.data(), wr1.data.size()); + } + + // create a mmap index + std::unique_ptr index1mm( + faiss::read_index(tmpname.c_str(), faiss::IO_FLAG_MMAP_IFC)); + + ASSERT_NE(index1mm, nullptr); + + // perform a search + std::vector cand_dis_1(k * nq); + std::vector cand_ids_1(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_1); + ASSERT_EQ(ref_dis_1, cand_dis_1); + + // ok now, overwrite the internals of the file without recreating it + { + std::ofstream ofs(tmpname); + ofs.seekp(0, std::ios::beg); + + ofs.write((const char*)wr2.data.data(), wr2.data.size()); + } + + // perform a search + std::vector cand_dis_2(k * nq); + std::vector cand_ids_2(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_2, cand_ids_2); + ASSERT_EQ(ref_dis_2, cand_dis_2); + + // write back data1 + { + std::ofstream ofs(tmpname); + ofs.seekp(0, std::ios::beg); + + ofs.write((const char*)wr1.data.data(), wr1.data.size()); + } + + // perform a search + std::vector cand_dis_3(k * nq); + std::vector cand_ids_3(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_3); + ASSERT_EQ(ref_dis_1, cand_dis_3); +} + +TEST(TestMmap, mmap_binary_flatcodes) { +#ifdef _AIX + GTEST_SKIP() << "Skipping test on AIX."; +#endif + // generate data + const size_t nt = 1000; + const size_t nq = 10; + // in bits + const size_t d = 64; + // in bytes + const size_t d8 = (d + 7) / 8; + const size_t k = 25; + + std::vector xt1 = make_binary_data(nt, d8, 123); + std::vector xt2 = make_binary_data(nt, d8, 456); + std::vector xq = make_binary_data(nq, d8, 789); + + // ensure that the data is different + ASSERT_NE(xt1, xt2); + + // make index1 and create reference results + faiss::IndexBinaryFlat index1(d); + index1.train(nt, xt1.data()); + index1.add(nt, xt1.data()); + + std::vector ref_dis_1(k * nq); + std::vector ref_ids_1(k * nq); + index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data()); + + // make index2 and create reference results + faiss::IndexBinaryFlat index2(d); + index2.train(nt, xt2.data()); + index2.add(nt, xt2.data()); + + std::vector ref_dis_2(k * nq); + std::vector ref_ids_2(k * nq); + index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data()); + + // ensure that the results are different + ASSERT_NE(ref_dis_1, ref_dis_2); + ASSERT_NE(ref_ids_1, ref_ids_2); + + // serialize both in a form of vectors + faiss::VectorIOWriter wr1; + faiss::write_index_binary(&index1, &wr1); + + faiss::VectorIOWriter wr2; + faiss::write_index_binary(&index2, &wr2); + + // generate a temporary file and write index1 into it + std::string tmpname = std::tmpnam(nullptr); + + { + std::ofstream ofs(tmpname); + ofs.write((const char*)wr1.data.data(), wr1.data.size()); + } + + // create a mmap index + std::unique_ptr index1mm( + faiss::read_index_binary(tmpname.c_str(), faiss::IO_FLAG_MMAP_IFC)); + + ASSERT_NE(index1mm, nullptr); + + // perform a search + std::vector cand_dis_1(k * nq); + std::vector cand_ids_1(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_1); + ASSERT_EQ(ref_dis_1, cand_dis_1); + + // ok now, overwrite the internals of the file without recreating it + { + std::ofstream ofs(tmpname); + ofs.seekp(0, std::ios::beg); + + ofs.write((const char*)wr2.data.data(), wr2.data.size()); + } + + // perform a search + std::vector cand_dis_2(k * nq); + std::vector cand_ids_2(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_2, cand_ids_2); + ASSERT_EQ(ref_dis_2, cand_dis_2); + + // write back data1 + { + std::ofstream ofs(tmpname); + ofs.seekp(0, std::ios::beg); + + ofs.write((const char*)wr1.data.data(), wr1.data.size()); + } + + // perform a search + std::vector cand_dis_3(k * nq); + std::vector cand_ids_3(k * nq); + index1mm->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_3); + ASSERT_EQ(ref_dis_1, cand_dis_3); +} diff --git a/tests/test_rabitq.py b/tests/test_rabitq.py new file mode 100644 index 0000000000..4ad0a0bcd4 --- /dev/null +++ b/tests/test_rabitq.py @@ -0,0 +1,445 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import faiss +import numpy as np + +from faiss.contrib import datasets + + +def random_rotation(d, seed=123): + rs = np.random.RandomState(seed) + Q, _ = np.linalg.qr(rs.randn(d, d)) + return Q + + +# based on https://gist.github.com/mdouze/0b2386c31d7fb8b20ae04f3fcbbf4d9d +class ReferenceRabitQ: + """Exact translation of the paper + https://dl.acm.org/doi/pdf/10.1145/3654970 + This is both a quantizer and serves to store the codes + """ + + def __init__(self, d, Bq=4): + self.d = d + self.Bq = Bq + + def train(self, xtrain, P): + self.centroid = xtrain.mean(0) + self.P = P + + def rotation(self, x): + return x @ self.P + + def inv_rotation(self, x): + return x @ self.P.T + + def add(self, Or): + # centering & normalization + Orc = Or - self.centroid + self.O_norms = np.sqrt((Orc**2).sum(1)) # need to store the norms + O = Orc / self.O_norms[:, None] + + # 3.1.3 + self.Xbarb = (self.inv_rotation(Orc) > 0).astype("int8") # 0, 1 + # here the encoded vectors are stored as an int array for simplicity + # but in the real code it would be as a packed uint8 array + # self.Xbarb = np.packbits(self.inv_rotation(Orc) > 0, axis=1) + # reconstruct to compute + Obar = self.rotation((2 * self.Xbarb - 1) / np.sqrt(self.d)) + self.o_Obar = (O * Obar).sum(1) # store dot products + + def distances(self, Qr): + """compute distance estimates for the queries to the stored vectors""" + d = self.d + Bq = self.Bq + + # preproc Qr + Qrc = Qr - self.centroid + Qrc_norms = np.sqrt((Qrc**2).sum(1))[:, None] + Q = Qrc + Qprime = self.inv_rotation(Q) + + # quantize queries to Bq bits + mins, maxes = Qprime.min(axis=1)[:, None], Qprime.max(axis=1)[:, None] + Delta = (maxes - mins) / (2**Bq - 1) + + # article mentioned a randomized variant + # qbar = np.floor((Qprime - mins) / Delta + rs.rand(nq, d)) + + # we'll use a non-randomized for the comparison purposes + qbar = np.round((Qprime - mins) / Delta) + # in the real implementation, this would be re-ordered + # in least-to most-significant bit + # dot product matrix, integers -- this is the expensive operation + dp = (qbar[:, None, :] * self.Xbarb[None, :, :]).sum(2) + + # the operations below roll back the normalizations to get the distance + # estimates. it is likely that they could be merged + # or some of them could be left out because we are interested only + # in top-k compute (eq 19-20) + sum_X = self.Xbarb.sum(1) + sum_Q = qbar.sum(1)[:, None] + sD = np.sqrt(d) + xbar_qbar = 2 * Delta / sD * dp + xbar_qbar += 2 * mins / sD * sum_X + xbar_qbar -= Delta / sD * sum_Q + xbar_qbar -= sD * mins + + # is close to thm 3.3 + # = eq 17 + + # / estimates (thm 3.2) + q_o = xbar_qbar / self.o_Obar + + # eq 1-2 to de-normalize and get distances + dis2_q_o = self.O_norms**2 + Qrc_norms**2 - 2 * self.O_norms * q_o + + return dis2_q_o + + +class ReferenceIVFRabitQ: + """straightforward IVF implementation""" + + def __init__(self, d, nlist, Bq=4): + self.d = d + self.nlist = nlist + self.invlists = [ReferenceRabitQ(d, Bq) for _ in range(nlist)] + self.quantizer = None + self.nprobe = 1 + + def train(self, xtrain, P): + if self.quantizer is None: + km = faiss.Kmeans(self.d, self.nlist, niter=10) + km.train(xtrain) + centroids = km.centroids + self.quantizer = faiss.IndexFlatL2(self.d) + self.quantizer.add(centroids) + else: + centroids = self.quantizer.reconstruct_n() + # Override the RabitQ train() to use a common random rotation + # and force centroids from the coarse quantizer + for list_no, rq in enumerate(self.invlists): + rq.centroid = centroids[list_no] + rq.P = P + + def add(self, x): + _, keys = self.quantizer.search(x, 1) + keys = keys.ravel() + n_per_invlist = np.bincount(keys, minlength=self.nlist) + order = np.argsort(keys) + i0 = 0 + for list_no, rab in enumerate(self.invlists): + i1 = i0 + n_per_invlist[list_no] + rab.list_size = i1 - i0 + if i1 > i0: + ids = order[i0:i1] + rab.ids = ids + rab.add(x[ids]) + i0 = i1 + + def search(self, x, k): + nq = len(x) + nprobe = self.nprobe + D = np.zeros((nq, k), dtype="float32") + I = np.zeros((nq, k), dtype=int) + D[:] = np.nan + I[:] = -1 + _, Ic = self.quantizer.search(x, nprobe) + + for qno, xq in enumerate(x): + # naive top-k implemetation with a full sort + q_dis = [] + q_ids = [] + for probe in range(nprobe): + rab = self.invlists[Ic[qno, probe]] + if rab.list_size == 0: + continue + # we cannot exploit the batch version + # of the queries (in this form) + dis = rab.distances(xq[None, :]) + q_ids.append(rab.ids) + q_dis.append(dis.ravel()) + q_dis = np.hstack(q_dis) + q_ids = np.hstack(q_ids) + o = q_dis.argsort() + kq = min(k, len(q_dis)) + D[qno, :kq] = q_dis[o[:kq]] + I[qno, :kq] = q_ids[o[:kq]] + return D, I + + +class TestRaBitQ(unittest.TestCase): + def do_comparison_vs_pq_test(self, metric_type=faiss.METRIC_L2): + ds = datasets.SyntheticDataset(128, 4096, 4096, 100) + k = 10 + + # PQ 8-to-1 + index_pq = faiss.IndexPQ(ds.d, 16, 8, metric_type) + index_pq.train(ds.get_train()) + index_pq.add(ds.get_database()) + _, I_pq = index_pq.search(ds.get_queries(), k) + + index_rbq = faiss.IndexRaBitQ(ds.d, metric_type) + index_rbq.train(ds.get_train()) + index_rbq.add(ds.get_database()) + _, I_rbq = index_rbq.search(ds.get_queries(), k) + + # try quantized query + rbq_params = faiss.RaBitQSearchParameters(qb=8) + _, I_rbq_q8 = index_rbq.search(ds.get_queries(), k, params=rbq_params) + + rbq_params = faiss.RaBitQSearchParameters(qb=4) + _, I_rbq_q4 = index_rbq.search(ds.get_queries(), k, params=rbq_params) + + index_flat = faiss.IndexFlat(ds.d, metric_type) + index_flat.train(ds.get_train()) + index_flat.add(ds.get_database()) + _, I_f = index_flat.search(ds.get_queries(), k) + + # ensure that RaBitQ and PQ are relatively close + eval_pq = faiss.eval_intersection(I_pq[:, :k], I_f[:, :k]) + eval_pq /= ds.nq * k + eval_rbq = faiss.eval_intersection(I_rbq[:, :k], I_f[:, :k]) + eval_rbq /= ds.nq * k + eval_rbq_q8 = faiss.eval_intersection(I_rbq_q8[:, :k], I_f[:, :k]) + eval_rbq_q8 /= ds.nq * k + eval_rbq_q4 = faiss.eval_intersection(I_rbq_q4[:, :k], I_f[:, :k]) + eval_rbq_q4 /= ds.nq * k + + print( + f"PQ is {eval_pq}, " + f"RaBitQ is {eval_rbq}, " + f"q8 RaBitQ is {eval_rbq_q8}, " + f"q4 RaBitQ is {eval_rbq_q4}" + ) + + np.testing.assert_(abs(eval_pq - eval_rbq) < 0.05) + np.testing.assert_(abs(eval_pq - eval_rbq_q8) < 0.05) + np.testing.assert_(abs(eval_pq - eval_rbq_q4) < 0.05) + np.testing.assert_(eval_pq > 0.55) + + def test_comparison_vs_pq_L2(self): + self.do_comparison_vs_pq_test(faiss.METRIC_L2) + + def test_comparison_vs_pq_IP(self): + self.do_comparison_vs_pq_test(faiss.METRIC_INNER_PRODUCT) + + def test_comparison_vs_ref_L2_rrot(self, rrot_seed=123): + ds = datasets.SyntheticDataset(128, 4096, 4096, 1) + + ref_rbq = ReferenceRabitQ(ds.d, Bq=8) + ref_rbq.train(ds.get_train(), random_rotation(ds.d, rrot_seed)) + ref_rbq.add(ds.get_database()) + + index_rbq = faiss.IndexRaBitQ(ds.d, faiss.METRIC_L2) + index_rbq.qb = 8 + + # wrap with random rotations + rrot = faiss.RandomRotationMatrix(ds.d, ds.d) + rrot.init(rrot_seed) + + index_cand = faiss.IndexPreTransform(rrot, index_rbq) + index_cand.train(ds.get_train()) + index_cand.add(ds.get_database()) + + ref_dis = ref_rbq.distances(ds.get_queries()) + + dc = index_cand.get_distance_computer() + xq = ds.get_queries() + + # ensure that the correlation coefficient is very high + dc_dist = [0] * ds.nb + + dc.set_query(faiss.swig_ptr(xq[0])) + for j in range(ds.nb): + dc_dist[j] = dc(j) + + corr = np.corrcoef(dc_dist, ref_dis[0])[0, 1] + print(corr) + np.testing.assert_(corr > 0.9) + + def test_comparison_vs_ref_L2(self): + ds = datasets.SyntheticDataset(128, 4096, 4096, 1) + + ref_rbq = ReferenceRabitQ(ds.d, Bq=8) + ref_rbq.train(ds.get_train(), np.identity(ds.d)) + ref_rbq.add(ds.get_database()) + + index_rbq = faiss.IndexRaBitQ(ds.d, faiss.METRIC_L2) + index_rbq.qb = 8 + index_rbq.train(ds.get_train()) + index_rbq.add(ds.get_database()) + + ref_dis = ref_rbq.distances(ds.get_queries()) + + dc = index_rbq.get_distance_computer() + xq = ds.get_queries() + + dc.set_query(faiss.swig_ptr(xq[0])) + for j in range(ds.nb): + upd_dis = dc(j) + # print(f"{j} {ref_dis[0][j]} {upd_dis}") + np.testing.assert_(abs(ref_dis[0][j] - upd_dis) < 0.001) + + def do_test_serde(self, description): + ds = datasets.SyntheticDataset(32, 1000, 100, 20) + + index = faiss.index_factory(ds.d, description) + index.train(ds.get_train()) + index.add(ds.get_database()) + + Dref, Iref = index.search(ds.get_queries(), 10) + + b = faiss.serialize_index(index) + index2 = faiss.deserialize_index(b) + + Dnew, Inew = index2.search(ds.get_queries(), 10) + + np.testing.assert_equal(Dref, Dnew) + np.testing.assert_equal(Iref, Inew) + + def test_serde_rabitq(self): + self.do_test_serde("RaBitQ") + + +class TestIVFRaBitQ(unittest.TestCase): + def test_comparison_vs_ref_L2(self): + ds = datasets.SyntheticDataset(128, 4096, 4096, 100) + + k = 10 + nlist = 200 + ref_rbq = ReferenceIVFRabitQ(ds.d, nlist, Bq=4) + ref_rbq.train(ds.get_train(), np.identity(ds.d)) + ref_rbq.add(ds.get_database()) + + index_flat = faiss.IndexFlat(ds.d, faiss.METRIC_L2) + index_rbq = faiss.IndexIVFRaBitQ( + index_flat, ds.d, nlist, faiss.METRIC_L2 + ) + index_rbq.qb = 4 + index_rbq.train(ds.get_train()) + index_rbq.add(ds.get_database()) + + for nprobe in 1, 4, 16: + ref_rbq.nprobe = nprobe + Dref, Iref = ref_rbq.search(ds.get_queries(), k) + r_ref_k = faiss.eval_intersection( + Iref[:, :k], ds.get_groundtruth()[:, :k] + ) / (ds.nq * k) + print(f"{nprobe=} k-recall@10={r_ref_k}") + + params = faiss.IVFRaBitQSearchParameters() + params.qb = index_rbq.qb + params.nprobe = nprobe + _, Inew, _ = faiss.search_with_parameters( + index_rbq, ds.get_queries(), k, params, output_stats=True + ) + r_new_k = faiss.eval_intersection( + Inew[:, :k], ds.get_groundtruth()[:, :k] + ) / (ds.nq * k) + print(f"{nprobe=} k-recall@10={r_new_k}") + + np.testing.assert_almost_equal(r_ref_k, r_new_k, 3) + + def test_comparison_vs_ref_L2_rrot(self): + ds = datasets.SyntheticDataset(128, 4096, 4096, 100) + + k = 10 + nlist = 200 + rrot_seed = 123 + + ref_rbq = ReferenceIVFRabitQ(ds.d, nlist, Bq=4) + ref_rbq.train(ds.get_train(), random_rotation(ds.d, rrot_seed)) + ref_rbq.add(ds.get_database()) + + index_flat = faiss.IndexFlat(ds.d, faiss.METRIC_L2) + index_rbq = faiss.IndexIVFRaBitQ( + index_flat, ds.d, nlist, faiss.METRIC_L2 + ) + index_rbq.qb = 4 + + # wrap with random rotations + rrot = faiss.RandomRotationMatrix(ds.d, ds.d) + rrot.init(rrot_seed) + + index_cand = faiss.IndexPreTransform(rrot, index_rbq) + index_cand.train(ds.get_train()) + index_cand.add(ds.get_database()) + + for nprobe in 1, 4, 16: + ref_rbq.nprobe = nprobe + Dref, Iref = ref_rbq.search(ds.get_queries(), k) + r_ref_k = faiss.eval_intersection( + Iref[:, :k], ds.get_groundtruth()[:, :k] + ) / (ds.nq * k) + print(f"{nprobe=} k-recall@10={r_ref_k}") + + params = faiss.IVFRaBitQSearchParameters() + params.qb = index_rbq.qb + params.nprobe = nprobe + Dnew, Inew, stats2 = faiss.search_with_parameters( + index_cand, ds.get_queries(), k, params, output_stats=True + ) + r_new_k = faiss.eval_intersection( + Inew[:, :k], ds.get_groundtruth()[:, :k] + ) / (ds.nq * k) + print(f"{nprobe=} k-recall@10={r_new_k}") + + np.testing.assert_almost_equal(r_ref_k, r_new_k, 2) + + def do_test_serde(self, description): + ds = datasets.SyntheticDataset(32, 1000, 100, 20) + + xt = ds.get_train() + xb = ds.get_database() + + index = faiss.index_factory(ds.d, description) + index.train(xt) + index.add(xb) + + Dref, Iref = index.search(ds.get_queries(), 10) + + b = faiss.serialize_index(index) + index2 = faiss.deserialize_index(b) + + Dnew, Inew = index2.search(ds.get_queries(), 10) + + np.testing.assert_equal(Dref, Dnew) + np.testing.assert_equal(Iref, Inew) + + def test_serde_ivfrabitq(self): + self.do_test_serde("IVF16,RaBitQ") + + +class TestRaBitQuantizerEncodeDecode(unittest.TestCase): + def do_test_encode_decode(self, d, metric): + # rabitq must precisely reconstruct a vector, + # which consists of +A and -A values + + seed = 123 + rs = np.random.RandomState(seed) + + ampl = 100 + n = 10 + vec = (2 * rs.randint(0, 2, d * n) - 1).astype(np.float32) * ampl + vec = np.reshape(vec, (n, d)) + + quantizer = faiss.RaBitQuantizer(d, metric) + + # encode and decode + vec_q = quantizer.compute_codes(vec) + vec_rec = quantizer.decode(vec_q) + + # verify + np.testing.assert_equal(vec, vec_rec) + + def test_encode_decode_L2(self): + self.do_test_encode_decode(16, faiss.METRIC_L2) + + def test_encode_decode_IP(self): + self.do_test_encode_decode(16, faiss.METRIC_INNER_PRODUCT) diff --git a/tests/test_search_params.py b/tests/test_search_params.py index 18436edf4d..56c2cd95ee 100644 --- a/tests/test_search_params.py +++ b/tests/test_search_params.py @@ -22,14 +22,38 @@ class TestSelector(unittest.TestCase): combinations as possible. """ - def do_test_id_selector(self, index_key, id_selector_type="batch", mt=faiss.METRIC_L2, k=10): + def do_test_id_selector( + self, + index_key, + id_selector_type="batch", + mt=faiss.METRIC_L2, + k=10, + use_heap=True + ): """ Verify that the id selector returns the subset of results that are members according to the IDSelector. Supports id_selector_type="batch", "bitmap", "range", "range_sorted", "and", "or", "xor" """ - ds = datasets.SyntheticDataset(32, 1000, 100, 20) - index = faiss.index_factory(ds.d, index_key, mt) - index.train(ds.get_train()) + d = 32 # make sure dimension is multiple of 8 for binary + ds = datasets.SyntheticDataset(d, 1000, 100, 20) + + if index_key == "BinaryFlat": + rs = np.random.RandomState(123) + xb = rs.randint(256, size=(ds.nb, d // 8), dtype='uint8') + xq = rs.randint(256, size=(ds.nq, d // 8), dtype='uint8') + index = faiss.IndexBinaryFlat(d) + index.use_heap = use_heap + # Use smaller radius for Hamming distance + base_radius = 4 + is_binary = True + else: + xb = ds.get_database() + xq = ds.get_queries() + xt = ds.get_train() + index = faiss.index_factory(d, index_key, mt) + index.train(xt) + base_radius = float('inf') # Will be set based on results + is_binary = False # reference result if "range" in id_selector_type: @@ -54,20 +78,22 @@ def do_test_id_selector(self, index_key, id_selector_type="batch", mt=faiss.METR subset = np.setxor1d(lhs_subset, rhs_subset) else: rs = np.random.RandomState(123) - subset = rs.choice(ds.nb, 50, replace=False).astype("int64") - # add_with_ids not supported for all index types - # index.add_with_ids(ds.get_database()[subset], subset) - index.add(ds.get_database()[subset]) + subset = rs.choice(ds.nb, 50, replace=False).astype('int64') + + index.add(xb[subset]) if "IVF" in index_key and id_selector_type == "range_sorted": self.assertTrue(index.check_ids_sorted()) - Dref, Iref0 = index.search(ds.get_queries(), k) + Dref, Iref0 = index.search(xq, k) Iref = subset[Iref0] Iref[Iref0 < 0] = -1 - radius = float(Dref[Iref > 0].max()) * 1.01 + if base_radius == float('inf'): + radius = float(Dref[Iref > 0].max()) * 1.01 + else: + radius = base_radius + try: - Rlims_ref, RDref, RIref = index.range_search( - ds.get_queries(), radius) + Rlims_ref, RDref, RIref = index.range_search(xq, radius) except RuntimeError as e: if "not implemented" in str(e): have_range_search = False @@ -81,7 +107,7 @@ def do_test_id_selector(self, index_key, id_selector_type="batch", mt=faiss.METR # result with selector: fill full database and search with selector index.reset() - index.add(ds.get_database()) + index.add(xb) if id_selector_type == "range": sel = faiss.IDSelectorRange(30, 80) elif id_selector_type == "range_sorted": @@ -123,17 +149,53 @@ def do_test_id_selector(self, index_key, id_selector_type="batch", mt=faiss.METR faiss.SearchParametersPQ(sel=sel) if "PQ" in index_key else faiss.SearchParameters(sel=sel) ) - Dnew, Inew = index.search(ds.get_queries(), k, params=params) - np.testing.assert_array_equal(Iref, Inew) - np.testing.assert_almost_equal(Dref, Dnew, decimal=5) + + Dnew, Inew = index.search(xq, k, params=params) + + if is_binary: + # For binary indexes, we need to check: + # 1. All returned IDs are valid (in the subset or -1) + # 2. The distances match + + # Check that all returned IDs are valid + valid_ids = np.ones_like(Inew, dtype=bool) + # Create a mask of valid IDs (those in subset) + subset_set = set(subset) # Convert to set for O(1) lookups + # Handle -1 values separately (they're always valid) + valid_ids = np.logical_or( + Inew == -1, + np.isin(Inew, list(subset_set)) + ) + + self.assertTrue(np.all(valid_ids), "Some returned IDs are not in the subset") + + # Check that distances match + np.testing.assert_almost_equal(Dref, Dnew, decimal=5) + else: + # For non-binary indexes, we can do exact comparison + np.testing.assert_array_equal(Iref, Inew) + np.testing.assert_almost_equal(Dref, Dnew, decimal=5) if have_range_search: - Rlims_new, RDnew, RInew = index.range_search( - ds.get_queries(), radius, params=params) + Rlims_new, RDnew, RInew = index.range_search(xq, radius, params=params) np.testing.assert_array_equal(Rlims_ref, Rlims_new) RDref, RIref = sort_range_res_2(Rlims_ref, RDref, RIref) - np.testing.assert_array_equal(RIref, RInew) - np.testing.assert_almost_equal(RDref, RDnew, decimal=5) + + if is_binary: + # For binary indexes, check that all returned IDs are valid + valid_ids = np.ones(len(RInew), dtype=bool) + # Use vectorized operation instead of loop + subset_set = set(subset) # Convert to set for O(1) lookups + valid_ids = np.isin(RInew, list(subset_set)) + + self.assertTrue(np.all(valid_ids), "Some range search IDs are not in the subset") + + # Check that distances match + np.testing.assert_almost_equal(RDref, RDnew, decimal=5) + else: + # For non-binary indexes, we can do exact comparison + np.testing.assert_array_equal(RIref, RInew) + np.testing.assert_almost_equal(RDref, RDnew, decimal=5) def test_IVFFlat(self): self.do_test_id_selector("IVF32,Flat") @@ -284,6 +346,17 @@ def test_bounds(self): distances, indices = index_ip.search(xb[:2], k=3, params=search_params) distances, indices = index_l2.search(xb[:2], k=3, params=search_params) + def test_BinaryFlat(self): + self.do_test_id_selector("BinaryFlat") + + def test_BinaryFlat_id_range(self): + self.do_test_id_selector("BinaryFlat", id_selector_type="range") + + def test_BinaryFlat_id_array(self): + self.do_test_id_selector("BinaryFlat", id_selector_type="array") + + def test_BinaryFlat_no_heap(self): + self.do_test_id_selector("BinaryFlat", use_heap=False) class TestSearchParams(unittest.TestCase): diff --git a/tests/test_zerocopy.cpp b/tests/test_zerocopy.cpp new file mode 100644 index 0000000000..9b8734bd53 --- /dev/null +++ b/tests/test_zerocopy.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +std::vector make_data(const size_t n, const size_t d, size_t seed) { + std::vector database(n * d); + std::mt19937 rng(seed); + std::uniform_real_distribution distrib; + + for (size_t i = 0; i < n * d; i++) { + database[i] = distrib(rng); + } + return database; +} + +std::vector make_binary_data( + const size_t n, + const size_t d, + size_t seed) { + std::vector database(n * d); + std::mt19937 rng(seed); + std::uniform_int_distribution distrib(0, 255); + + for (size_t i = 0; i < n * d; i++) { + database[i] = distrib(rng); + } + return database; +} + +} // namespace + +// the logic is the following: +// 1. generate two flatcodes-based indices, Index1 and Index2 +// 2. serialize both indices into std::vector<> buffers, Buf1 and Buf2 +// 3. deserialize Index1 using zero-copy feature on Buf1 into Index1ZC +// 4. ensure that Index1ZC acts as Index2 if we write the data from Buf2 +// on top of the existing Buf1 + +TEST(TestZeroCopy, zerocopy_flatcodes) { + // generate data + const size_t nt = 1000; + const size_t nq = 10; + const size_t d = 32; + const size_t k = 25; + + std::vector xt1 = make_data(nt, d, 123); + std::vector xt2 = make_data(nt, d, 456); + std::vector xq = make_data(nq, d, 789); + + // ensure that the data is different + ASSERT_NE(xt1, xt2); + + // make index1 and create reference results + faiss::IndexFlatL2 index1(d); + index1.train(nt, xt1.data()); + index1.add(nt, xt1.data()); + + std::vector ref_dis_1(k * nq); + std::vector ref_ids_1(k * nq); + index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data()); + + // make index2 and create reference results + faiss::IndexFlatL2 index2(d); + index2.train(nt, xt2.data()); + index2.add(nt, xt2.data()); + + std::vector ref_dis_2(k * nq); + std::vector ref_ids_2(k * nq); + index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data()); + + // ensure that the results are different + ASSERT_NE(ref_dis_1, ref_dis_2); + ASSERT_NE(ref_ids_1, ref_ids_2); + + // serialize both in a form of vectors + faiss::VectorIOWriter wr1; + faiss::write_index(&index1, &wr1); + + faiss::VectorIOWriter wr2; + faiss::write_index(&index2, &wr2); + + ASSERT_EQ(wr1.data.size(), wr2.data.size()); + + // clone a buffer + std::vector buffer = wr1.data; + + // create a zero-copy index + faiss::ZeroCopyIOReader reader(buffer.data(), buffer.size()); + std::unique_ptr index1zc(faiss::read_index(&reader)); + + ASSERT_NE(index1zc, nullptr); + + // perform a search + std::vector cand_dis_1(k * nq); + std::vector cand_ids_1(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_1); + ASSERT_EQ(ref_dis_1, cand_dis_1); + + // overwrite buffer without moving it + for (size_t i = 0; i < buffer.size(); i++) { + buffer[i] = wr2.data[i]; + } + + // perform a search + std::vector cand_dis_2(k * nq); + std::vector cand_ids_2(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data()); + + // match vs ref2 + ASSERT_EQ(ref_ids_2, cand_ids_2); + ASSERT_EQ(ref_dis_2, cand_dis_2); + + // overwrite again + for (size_t i = 0; i < buffer.size(); i++) { + buffer[i] = wr1.data[i]; + } + + // perform a search + std::vector cand_dis_3(k * nq); + std::vector cand_ids_3(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_3); + ASSERT_EQ(ref_dis_1, cand_dis_3); +} + +TEST(TestZeroCopy, zerocopy_binary_flatcodes) { + // generate data + const size_t nt = 1000; + const size_t nq = 10; + // in bits + const size_t d = 64; + // in bytes + const size_t d8 = (d + 7) / 8; + const size_t k = 25; + + std::vector xt1 = make_binary_data(nt, d8, 123); + std::vector xt2 = make_binary_data(nt, d8, 456); + std::vector xq = make_binary_data(nq, d8, 789); + + // ensure that the data is different + ASSERT_NE(xt1, xt2); + + // make index1 and create reference results + faiss::IndexBinaryFlat index1(d); + index1.train(nt, xt1.data()); + index1.add(nt, xt1.data()); + + std::vector ref_dis_1(k * nq); + std::vector ref_ids_1(k * nq); + index1.search(nq, xq.data(), k, ref_dis_1.data(), ref_ids_1.data()); + + // make index2 and create reference results + faiss::IndexBinaryFlat index2(d); + index2.train(nt, xt2.data()); + index2.add(nt, xt2.data()); + + std::vector ref_dis_2(k * nq); + std::vector ref_ids_2(k * nq); + index2.search(nq, xq.data(), k, ref_dis_2.data(), ref_ids_2.data()); + + // ensure that the results are different + ASSERT_NE(ref_dis_1, ref_dis_2); + ASSERT_NE(ref_ids_1, ref_ids_2); + + // serialize both in a form of vectors + faiss::VectorIOWriter wr1; + faiss::write_index_binary(&index1, &wr1); + + faiss::VectorIOWriter wr2; + faiss::write_index_binary(&index2, &wr2); + + ASSERT_EQ(wr1.data.size(), wr2.data.size()); + + // clone a buffer + std::vector buffer = wr1.data; + + // create a zero-copy index + faiss::ZeroCopyIOReader reader(buffer.data(), buffer.size()); + std::unique_ptr index1zc( + faiss::read_index_binary(&reader)); + + ASSERT_NE(index1zc, nullptr); + + // perform a search + std::vector cand_dis_1(k * nq); + std::vector cand_ids_1(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_1.data(), cand_ids_1.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_1); + ASSERT_EQ(ref_dis_1, cand_dis_1); + + // overwrite buffer without moving it + for (size_t i = 0; i < buffer.size(); i++) { + buffer[i] = wr2.data[i]; + } + + // perform a search + std::vector cand_dis_2(k * nq); + std::vector cand_ids_2(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_2.data(), cand_ids_2.data()); + + // match vs ref2 + ASSERT_EQ(ref_ids_2, cand_ids_2); + ASSERT_EQ(ref_dis_2, cand_dis_2); + + // overwrite again + for (size_t i = 0; i < buffer.size(); i++) { + buffer[i] = wr1.data[i]; + } + + // perform a search + std::vector cand_dis_3(k * nq); + std::vector cand_ids_3(k * nq); + index1zc->search(nq, xq.data(), k, cand_dis_3.data(), cand_ids_3.data()); + + // match vs ref1 + ASSERT_EQ(ref_ids_1, cand_ids_3); + ASSERT_EQ(ref_dis_1, cand_dis_3); +}