From a413963e56c344450d8933bd1797181ca8dcfe71 Mon Sep 17 00:00:00 2001 From: Alexandr Guzhva Date: Mon, 24 Jun 2024 17:34:38 -0400 Subject: [PATCH] propagate faiss changes from (24 Jun 2024) Signed-off-by: Alexandr Guzhva --- tests/faiss/CMakeLists.txt | 2 + thirdparty/faiss/.circleci/config.yml | 428 +------ .../.github/actions/build_cmake/action.yml | 105 ++ .../.github/actions/build_conda/action.yml | 96 ++ thirdparty/faiss/.github/workflows/build.yml | 244 ++++ .../faiss/.github/workflows/nightly.yml | 139 +++ .../benchs/bench_cppcontrib_sa_decode.cpp | 175 +-- thirdparty/faiss/benchs/bench_fw/benchmark.py | 764 +++++++++--- .../faiss/benchs/bench_fw/benchmark_io.py | 6 +- .../faiss/benchs/bench_fw/descriptors.py | 216 +++- thirdparty/faiss/benchs/bench_fw/index.py | 123 +- thirdparty/faiss/benchs/bench_fw/optimize.py | 18 +- thirdparty/faiss/benchs/bench_fw_codecs.py | 10 +- thirdparty/faiss/benchs/bench_fw_ivf.py | 31 +- .../faiss/benchs/bench_fw_notebook.ipynb | 1059 +++++++++-------- thirdparty/faiss/benchs/bench_fw_optimize.py | 6 +- thirdparty/faiss/benchs/bench_fw_range.py | 20 +- .../faiss/c_api/IndexScalarQuantizer_c.h | 3 + .../faiss/conda/faiss-gpu-raft/meta.yaml | 17 +- thirdparty/faiss/conda/faiss-gpu/build-lib.sh | 6 + thirdparty/faiss/conda/faiss-gpu/meta.yaml | 6 +- thirdparty/faiss/conda/faiss/meta.yaml | 4 +- thirdparty/faiss/contrib/datasets.py | 6 +- thirdparty/faiss/contrib/factory_tools.py | 3 + thirdparty/faiss/contrib/vecs_io.py | 8 +- thirdparty/faiss/faiss/IndexFlat.cpp | 14 +- thirdparty/faiss/faiss/IndexHNSW.cpp | 174 ++- thirdparty/faiss/faiss/IndexHNSW.h | 44 +- thirdparty/faiss/faiss/IndexIVFFastScan.cpp | 6 - thirdparty/faiss/faiss/IndexNNDescent.cpp | 29 - .../faiss/faiss/IndexScalarQuantizer.cpp | 4 +- thirdparty/faiss/faiss/MetricType.h | 4 + thirdparty/faiss/faiss/gpu/GpuIcmEncoder.cu | 12 +- .../faiss/faiss/impl/AuxIndexStructures.cpp | 25 + .../faiss/faiss/impl/AuxIndexStructures.h | 8 + .../faiss/faiss/impl/DistanceComputer.h | 46 + thirdparty/faiss/faiss/impl/HNSW.cpp | 72 +- thirdparty/faiss/faiss/impl/HNSW.h | 12 +- thirdparty/faiss/faiss/impl/NNDescent.cpp | 21 +- thirdparty/faiss/faiss/impl/NSG.cpp | 29 - .../faiss/faiss/impl/ScalarQuantizer.cpp | 7 + thirdparty/faiss/faiss/impl/ScalarQuantizer.h | 5 +- .../faiss/faiss/impl/ScalarQuantizerCodec.h | 88 ++ .../faiss/impl/ScalarQuantizerCodec_avx.h | 81 ++ .../faiss/impl/ScalarQuantizerCodec_avx512.h | 93 ++ .../faiss/impl/ScalarQuantizerCodec_neon.h | 102 +- .../impl/code_distance/code_distance-avx2.h | 5 + thirdparty/faiss/faiss/impl/index_read.cpp | 20 +- thirdparty/faiss/faiss/impl/index_write.cpp | 33 +- thirdparty/faiss/faiss/index_factory.cpp | 6 +- thirdparty/faiss/faiss/index_io.h | 11 +- .../faiss/faiss/invlists/InvertedLists.cpp | 72 +- .../faiss/faiss/invlists/InvertedLists.h | 27 +- thirdparty/faiss/faiss/utils/bf16.h | 36 + .../faiss/faiss/utils/extra_distances-inl.h | 32 + .../faiss/faiss/utils/extra_distances.cpp | 60 +- .../faiss/faiss/utils/extra_distances.h | 5 +- thirdparty/faiss/faiss/utils/simdlib_neon.h | 10 +- thirdparty/faiss/tests/CMakeLists.txt | 2 + thirdparty/faiss/tests/common_faiss_tests.py | 1 - .../faiss/tests/test_binary_hashindex.py | 10 - thirdparty/faiss/tests/test_build_blocks.py | 15 - thirdparty/faiss/tests/test_callback.cpp | 37 + thirdparty/faiss/tests/test_callback_py.py | 32 + thirdparty/faiss/tests/test_clustering.py | 3 - .../tests/test_common_ivf_empty_index.cpp | 144 +++ thirdparty/faiss/tests/test_contrib.py | 2 - .../faiss/tests/test_contrib_with_scipy.py | 2 - .../faiss/tests/test_extra_distances.py | 27 + thirdparty/faiss/tests/test_fast_scan.py | 3 - thirdparty/faiss/tests/test_graph_based.py | 43 +- thirdparty/faiss/tests/test_index.py | 11 +- thirdparty/faiss/tests/test_index_accuracy.py | 55 +- thirdparty/faiss/tests/test_index_binary.py | 6 +- .../faiss/tests/test_index_composite.py | 3 - thirdparty/faiss/tests/test_io.py | 1 - thirdparty/faiss/tests/test_ivf_index.cpp | 2 + thirdparty/faiss/tests/test_ivflib.py | 1 - .../tests/test_local_search_quantizer.py | 8 +- thirdparty/faiss/tests/test_merge_index.py | 1 - thirdparty/faiss/tests/test_meta_index.py | 7 - thirdparty/faiss/tests/test_partition.py | 6 - .../faiss/tests/test_product_quantizer.py | 2 - .../faiss/tests/test_residual_quantizer.py | 17 - thirdparty/faiss/tests/test_rowwise_minmax.py | 1 - thirdparty/faiss/tests/test_search_params.py | 1 - .../faiss/tests/test_standalone_codec.py | 12 +- thirdparty/faiss/tutorial/cpp/1-Flat.cpp | 4 +- thirdparty/faiss/tutorial/cpp/2-IVFFlat.cpp | 7 +- thirdparty/faiss/tutorial/cpp/6-HNSW.cpp | 73 ++ .../faiss/tutorial/cpp/7-PQFastScan.cpp | 75 ++ .../faiss/tutorial/cpp/8-PQFastScanRefine.cpp | 84 ++ .../faiss/tutorial/cpp/9-RefineComparison.cpp | 104 ++ thirdparty/faiss/tutorial/cpp/CMakeLists.txt | 12 + .../faiss/tutorial/python/7-PQFastScan.py | 35 + .../tutorial/python/8-PQFastScanRefine.py | 38 + .../tutorial/python/9-RefineComparison.py | 42 + 97 files changed, 3900 insertions(+), 1717 deletions(-) create mode 100644 thirdparty/faiss/.github/actions/build_cmake/action.yml create mode 100644 thirdparty/faiss/.github/actions/build_conda/action.yml create mode 100644 thirdparty/faiss/.github/workflows/build.yml create mode 100644 thirdparty/faiss/.github/workflows/nightly.yml create mode 100644 thirdparty/faiss/faiss/utils/bf16.h create mode 100644 thirdparty/faiss/tests/test_callback.cpp create mode 100644 thirdparty/faiss/tests/test_callback_py.py create mode 100644 thirdparty/faiss/tests/test_common_ivf_empty_index.cpp create mode 100644 thirdparty/faiss/tutorial/cpp/6-HNSW.cpp create mode 100644 thirdparty/faiss/tutorial/cpp/7-PQFastScan.cpp create mode 100644 thirdparty/faiss/tutorial/cpp/8-PQFastScanRefine.cpp create mode 100644 thirdparty/faiss/tutorial/cpp/9-RefineComparison.cpp create mode 100644 thirdparty/faiss/tutorial/python/7-PQFastScan.py create mode 100644 thirdparty/faiss/tutorial/python/8-PQFastScanRefine.py create mode 100644 thirdparty/faiss/tutorial/python/9-RefineComparison.py diff --git a/tests/faiss/CMakeLists.txt b/tests/faiss/CMakeLists.txt index dd15ddc2c..e4e0c6c2e 100644 --- a/tests/faiss/CMakeLists.txt +++ b/tests/faiss/CMakeLists.txt @@ -26,6 +26,8 @@ set(FAISS_TEST_SRCS ../../thirdparty/faiss/tests/test_fastscan_perf.cpp ../../thirdparty/faiss/tests/test_ivf_index.cpp ../../thirdparty/faiss/tests/test_disable_pq_sdc_tables.cpp + ../../thirdparty/faiss/tests/test_common_ivf_empty_index.cpp + ../../thirdparty/faiss/tests/test_callback.cpp ) find_package(GTest REQUIRED) diff --git a/thirdparty/faiss/.circleci/config.yml b/thirdparty/faiss/.circleci/config.yml index 549e4a279..033093915 100644 --- a/thirdparty/faiss/.circleci/config.yml +++ b/thirdparty/faiss/.circleci/config.yml @@ -5,185 +5,8 @@ executors: docker: - image: continuumio/miniconda3 resource_class: large - linux-x86_64-gpu: - environment: - CONDA_ARCH: Linux-x86_64 - machine: - image: linux-cuda-12:default - resource_class: gpu.nvidia.medium - linux-arm64-cpu: - environment: - CONDA_ARCH: Linux-aarch64 - machine: - image: ubuntu-2204:current - resource_class: arm.medium - macosx-arm64-cpu: - environment: - CONDA_ARCH: MacOSX-arm64 - macos: - xcode: 14.2.0 # minimum supported for M1 - resource_class: macos.m1.large.gen1 - windows-x86_64-cpu: - machine: - image: windows-server-2019-vs2019:2023.04.1 - shell: bash.exe - resource_class: windows.medium jobs: - format: - docker: - - image: ubuntu:22.04 - steps: - - checkout - - run: - name: Install clang-format - command: | - apt-get update - apt-get install -y git-core clang-format-11 - - run: - name: Verify clang-format - command: | - git ls-files | grep -E '\.(cpp|h|cu|cuh)$' | xargs clang-format-11 -i - if git diff --quiet; then - echo "Formatting OK!" - else - echo "Formatting not OK!" - echo "------------------" - git --no-pager diff --color - exit 1 - fi - - build_conda: - parameters: - label: - type: string - default: "" - cuda: - type: string - default: "" - raft: - type: string - default: "" - cuda_archs: - type: string - default: "" - compiler_version: - type: string - default: "" - exec: - type: executor - executor: << parameters.exec >> - environment: - OMP_NUM_THREADS: 10 - PACKAGE_TYPE: <> - CUDA_ARCHS: <> - steps: - - checkout - - run: - name: Install conda - command: | - if [ -n "${CONDA_ARCH}" ] - then - curl https://repo.anaconda.com/miniconda/Miniconda3-latest-${CONDA_ARCH}.sh --output miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - ~/miniconda/bin/conda init - fi - - run: - name: Install conda build tools - command: | - # conda config --set solver libmamba - # conda config --set verbosity 3 - conda update -y -q conda - conda install -y -q conda-build - - when: - condition: << parameters.label >> - steps: - - run: - name: Enable anaconda uploads - command: | - conda install -y -q anaconda-client - conda config --set anaconda_upload yes - - when: - condition: - and: - - not: << parameters.label >> - - not: << parameters.cuda >> - steps: - - run: - name: Conda build (CPU) - no_output_timeout: 30m - command: | - cd conda - conda build faiss --python 3.11 -c pytorch - - when: - condition: - and: - - << parameters.label >> - - not: << parameters.cuda >> - steps: - - run: - name: Conda build (CPU) w/ anaconda upload - no_output_timeout: 30m - command: | - cd conda - conda build faiss --user pytorch --label <> -c pytorch - - when: - condition: - and: - - not: << parameters.label >> - - << parameters.cuda >> - - not: << parameters.raft >> - steps: - - run: - name: Conda build (GPU) - no_output_timeout: 60m - command: | - cd conda - conda build faiss-gpu --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - -c pytorch -c nvidia/label/cuda-<> -c nvidia - - when: - condition: - and: - - << parameters.label >> - - << parameters.cuda >> - - not: << parameters.raft >> - steps: - - run: - name: Conda build (GPU) w/ anaconda upload - no_output_timeout: 60m - command: | - cd conda - conda build faiss-gpu --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - --user pytorch --label <> -c pytorch -c nvidia/label/cuda-<> -c nvidia - - when: - condition: - and: - - not: << parameters.label >> - - << parameters.cuda >> - - << parameters.raft >> - steps: - - run: - name: Conda build (GPU w/ RAFT) - no_output_timeout: 60m - command: | - cd conda - conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - -c pytorch -c nvidia/label/cuda-<> -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge - - when: - condition: - and: - - << parameters.label >> - - << parameters.cuda >> - - << parameters.raft >> - steps: - - run: - name: Conda build (GPU w/ RAFT) w/ anaconda upload - no_output_timeout: 60m - command: | - cd conda - conda build faiss-gpu-raft --variants '{ "cudatoolkit": "<>", "c_compiler_version": "<>", "cxx_compiler_version": "<>" }' \ - --user pytorch --label <> -c pytorch -c nvidia/label/cuda-<> -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge - build_cmake: parameters: exec: @@ -191,12 +14,6 @@ jobs: opt_level: type: string default: generic - gpu: - type: string - default: "OFF" - raft: - type: string - default: "OFF" executor: << parameters.exec >> environment: OMP_NUM_THREADS: 10 @@ -217,32 +34,10 @@ jobs: command: | conda config --set solver libmamba conda update -y -q conda - - when: - condition: - equal: [ "OFF", << parameters.raft >> ] - steps: - - run: - name: Install env using main channel - command: | - conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64=11.2 sysroot_linux-64 - - when: - condition: - equal: [ "ON", << parameters.raft >> ] - steps: - - run: - name: Install env using conda-forge channel - command: | - conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64=11.2 sysroot_linux-64=2.28 libraft cuda-version=11.8 cuda-toolkit -c rapidsai-nightly -c "nvidia/label/cuda-11.8.0" -c conda-forge - - when: - condition: - and: - - equal: [ "ON", << parameters.gpu >> ] - - equal: [ "OFF", << parameters.raft >> ] - steps: - - run: - name: Install CUDA - command: | - conda install -y -q cuda-toolkit -c "nvidia/label/cuda-11.8.0" + - run: + name: Install env using main channel + command: | + conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest gxx_linux-64=11.2 sysroot_linux-64 - run: name: Build all targets no_output_timeout: 30m @@ -252,8 +47,8 @@ jobs: cmake -B build \ -DBUILD_TESTING=ON \ -DBUILD_SHARED_LIBS=ON \ - -DFAISS_ENABLE_GPU=<< parameters.gpu >> \ - -DFAISS_ENABLE_RAFT=<< parameters.raft >> \ + -DFAISS_ENABLE_GPU=OFF \ + -DFAISS_ENABLE_RAFT=OFF \ -DFAISS_OPT_LEVEL=<< parameters.opt_level >> \ -DFAISS_ENABLE_C_API=ON \ -DPYTHON_EXECUTABLE=$(which python) \ @@ -272,38 +67,12 @@ jobs: command: | cd build/faiss/python python setup.py install - - when: - condition: - equal: [ "OFF", << parameters.gpu >> ] - steps: - - run: - name: Python tests (CPU only) - command: | - conda install -y -q pytorch -c pytorch - pytest --junitxml=test-results/pytest/results.xml tests/test_*.py - pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py - - when: - condition: - equal: [ "ON", << parameters.gpu >> ] - steps: - - run: - name: Python tests (CPU + GPU) - command: | - conda install -y -q pytorch pytorch-cuda=11.8 -c pytorch -c nvidia/label/cuda-11.8.0 - pytest --junitxml=test-results/pytest/results.xml tests/test_*.py - pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py - cp tests/common_faiss_tests.py faiss/gpu/test - pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py - pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py - - when: - condition: - equal: [ "avx2", << parameters.opt_level >> ] - steps: - - run: - name: Test avx2 loading - command: | - FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss.so - LD_DEBUG=libs python -c "import faiss" 2>&1 | grep faiss_avx2.so + - run: + name: Python tests (CPU only) + command: | + conda install -y -q pytorch -c pytorch + pytest --junitxml=test-results/pytest/results.xml tests/test_*.py + pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py - store_test_results: path: test-results @@ -311,180 +80,7 @@ workflows: version: 2 build: jobs: - - format: - name: Format - - build_cmake: - name: Linux x86_64 (cmake) - exec: linux-x86_64-cpu - - build_cmake: - name: Linux x86_64 AVX2 (cmake) - exec: linux-x86_64-cpu - opt_level: "avx2" - build_cmake: name: Linux x86_64 AVX512 (cmake) exec: linux-x86_64-cpu opt_level: "avx512" - - build_cmake: - name: Linux x86_64 GPU (cmake) - exec: linux-x86_64-gpu - gpu: "ON" - requires: - - Linux x86_64 AVX2 (cmake) - - build_cmake: - name: Linux x86_64 GPU w/ RAFT (cmake) - exec: linux-x86_64-gpu - gpu: "ON" - raft: "ON" - requires: - - Linux x86_64 GPU (cmake) - - build_conda: - name: Linux x86_64 (conda) - exec: linux-x86_64-cpu - - build_conda: - name: Windows x86_64 (conda) - exec: windows-x86_64-cpu - - build_conda: - name: Linux arm64 (conda) - exec: linux-arm64-cpu - - build_conda: - name: Linux x86_64 packages - exec: linux-x86_64-cpu - label: main - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Linux x86_64 GPU packages (CUDA 11.4.4) - exec: linux-x86_64-gpu - label: main - cuda: "11.4.4" - cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8.0) - exec: linux-x86_64-gpu - label: main - raft: "ON" - cuda: "11.8.0" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Linux x86_64 GPU packages (CUDA 12.1.1) - exec: linux-x86_64-gpu - label: main - cuda: "12.1.1" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Linux x86_64 GPU w/ RAFT packages (CUDA 12.1.1) - exec: linux-x86_64-gpu - label: main - raft: "ON" - cuda: "12.1.1" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Windows x86_64 packages - exec: windows-x86_64-cpu - label: main - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: OSX arm64 packages - exec: macosx-arm64-cpu - label: main - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - build_conda: - name: Linux arm64 packages - exec: linux-arm64-cpu - label: main - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ - - nightly: - triggers: - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - main - jobs: - - build_conda: - name: Linux x86_64 nightlies - exec: linux-x86_64-cpu - label: nightly - - build_conda: - name: Linux x86_64 GPU nightlies (CUDA 11.4.4) - exec: linux-x86_64-gpu - label: nightly - cuda: "11.4.4" - cuda_archs: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - - build_conda: - name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0) - exec: linux-x86_64-gpu - label: nightly - raft: "ON" - cuda: "11.8.0" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - - build_conda: - name: Linux x86_64 GPU nightlies (CUDA 12.1.1) - exec: linux-x86_64-gpu - label: nightly - cuda: "12.1.1" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - - build_conda: - name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.1) - exec: linux-x86_64-gpu - label: nightly - raft: "ON" - cuda: "12.1.1" - cuda_archs: "70-real;72-real;75-real;80;86-real" - compiler_version: "11.2" - - build_conda: - name: Windows x86_64 nightlies - exec: windows-x86_64-cpu - label: nightly - - build_conda: - name: OSX arm64 nightlies - exec: macosx-arm64-cpu - label: nightly - - build_conda: - name: Linux arm64 nightlies - exec: linux-arm64-cpu - label: nightly diff --git a/thirdparty/faiss/.github/actions/build_cmake/action.yml b/thirdparty/faiss/.github/actions/build_cmake/action.yml new file mode 100644 index 000000000..2bc476add --- /dev/null +++ b/thirdparty/faiss/.github/actions/build_cmake/action.yml @@ -0,0 +1,105 @@ +name: Build cmake +inputs: + opt_level: + description: 'Compile options / optimization level.' + required: false + default: generic + gpu: + description: 'Enable GPU support.' + required: false + default: OFF + raft: + description: 'Enable RAFT support.' + required: false + default: OFF +runs: + using: composite + steps: + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v3 + with: + python-version: '3.11' + miniconda-version: latest + - name: Configure build environment + shell: bash + run: | + # initialize Conda + conda config --set solver libmamba + conda update -y -q conda + echo "$CONDA/bin" >> $GITHUB_PATH + + # install base packages + conda install -y -q -c conda-forge gxx_linux-64=11.2 sysroot_linux-64=2.28 + conda install -y -q python=3.11 cmake make swig mkl=2023 mkl-devel=2023 numpy scipy pytest + + # install CUDA packages + if [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.raft }}" = "OFF" ]; then + conda install -y -q cuda-toolkit -c "nvidia/label/cuda-11.8.0" + fi + + # install RAFT packages + if [ "${{ inputs.raft }}" = "ON" ]; then + conda install -y -q libraft cuda-version=11.8 cuda-toolkit -c rapidsai-nightly -c "nvidia/label/cuda-11.8.0" -c conda-forge + fi + + # install test packages + conda install -y pytest + if [ "${{ inputs.gpu }}" = "ON" ]; then + conda install -y -q pytorch pytorch-cuda=11.8 -c pytorch -c nvidia/label/cuda-11.8.0 + else + conda install -y -q pytorch -c pytorch + fi + - name: Build all targets + shell: bash + run: | + eval "$(conda shell.bash hook)" + conda activate + cmake -B build \ + -DBUILD_TESTING=ON \ + -DBUILD_SHARED_LIBS=ON \ + -DFAISS_ENABLE_GPU=${{ inputs.gpu }} \ + -DFAISS_ENABLE_RAFT=${{ inputs.raft }} \ + -DFAISS_OPT_LEVEL=${{ inputs.opt_level }} \ + -DFAISS_ENABLE_C_API=ON \ + -DPYTHON_EXECUTABLE=$CONDA/bin/python \ + -DCMAKE_BUILD_TYPE=Release \ + -DBLA_VENDOR=Intel10_64_dyn \ + -DCMAKE_CUDA_FLAGS="-gencode arch=compute_75,code=sm_75" \ + . + make -k -C build -j$(nproc) + - name: C++ tests + shell: bash + run: | + export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/" + make -C build test + - name: Install Python extension + shell: bash + working-directory: build/faiss/python + run: | + $CONDA/bin/python setup.py install + - name: Python tests (CPU only) + if: inputs.gpu == 'OFF' + shell: bash + run: | + pytest --junitxml=test-results/pytest/results.xml tests/test_*.py + pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py + - name: Python tests (CPU + GPU) + if: inputs.gpu == 'ON' + shell: bash + run: | + pytest --junitxml=test-results/pytest/results.xml tests/test_*.py + pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py + cp tests/common_faiss_tests.py faiss/gpu/test + pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py + pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py + - name: Test avx2 loading + if: inputs.opt_level == 'avx2' + shell: bash + run: | + FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss.so + LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss_avx2.so + - name: Upload test results + uses: actions/upload-artifact@v4 + with: + name: test-results-${{ inputs.opt_level }}-${{ inputs.gpu }}-${{ inputs.raft }} + path: test-results diff --git a/thirdparty/faiss/.github/actions/build_conda/action.yml b/thirdparty/faiss/.github/actions/build_conda/action.yml new file mode 100644 index 000000000..982430c35 --- /dev/null +++ b/thirdparty/faiss/.github/actions/build_conda/action.yml @@ -0,0 +1,96 @@ +name: Conda build +description: Builds FAISS inside a Conda environment and uploads to repository when label is provided. +inputs: + label: + description: "The label to be used for uploads to Conda." + default: "" + required: false + cuda: + description: "CUDA toolkit version to use." + default: "" + required: false + raft: + description: "Enable RAFT support." + default: "" + required: false + compiler_version: + description: "compiler_version" + default: "Compiler version for C/C++/CUDA." + required: false +runs: + using: composite + steps: + - name: Choose shell + shell: bash + id: choose_shell + run: | + # Use pwsh on Windows; bash everywhere else + if [ "${{ runner.os }}" != "Windows" ]; then + echo "shell=bash" >> "$GITHUB_OUTPUT" + else + echo "shell=pwsh" >> "$GITHUB_OUTPUT" + fi + - name: Setup miniconda + uses: conda-incubator/setup-miniconda@v3 + with: + python-version: '3.11' + miniconda-version: latest + - name: Install conda build tools + shell: ${{ steps.choose_shell.outputs.shell }} + run: | + conda update -y -q conda + conda install -y -q conda-build + - name: Enable anaconda uploads + if: inputs.label != '' + shell: ${{ steps.choose_shell.outputs.shell }} + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda install -y -q anaconda-client + conda config --set anaconda_upload yes + - name: Conda build (CPU) + if: inputs.label == '' && inputs.cuda == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss --python 3.11 -c pytorch + - name: Conda build (CPU) w/ anaconda upload + if: inputs.label != '' && inputs.cuda == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss --user pytorch --label ${{ inputs.label }} -c pytorch + - name: Conda build (GPU) + if: inputs.label == '' && inputs.cuda != '' && inputs.raft == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}", "c_compiler_version": "${{ inputs.compiler_version }}", "cxx_compiler_version": "${{ inputs.compiler_version }}" }' \ + -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia + - name: Conda build (GPU) w/ anaconda upload + if: inputs.label != '' && inputs.cuda != '' && inputs.raft == '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}", "c_compiler_version": "${{ inputs.compiler_version }}", "cxx_compiler_version": "${{ inputs.compiler_version }}" }' \ + --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia + - name: Conda build (GPU w/ RAFT) + if: inputs.label == '' && inputs.cuda != '' && inputs.raft != '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + run: | + conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}", "c_compiler_version": "${{ inputs.compiler_version }}", "cxx_compiler_version": "${{ inputs.compiler_version }}" }' \ + -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge + - name: Conda build (GPU w/ RAFT) w/ anaconda upload + if: inputs.label != '' && inputs.cuda != '' && inputs.raft != '' + shell: ${{ steps.choose_shell.outputs.shell }} + working-directory: conda + env: + PACKAGE_TYPE: ${{ inputs.label }} + run: | + conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}", "c_compiler_version": "${{ inputs.compiler_version }}", "cxx_compiler_version": "${{ inputs.compiler_version }}" }' \ + --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge diff --git a/thirdparty/faiss/.github/workflows/build.yml b/thirdparty/faiss/.github/workflows/build.yml new file mode 100644 index 000000000..bd415dfce --- /dev/null +++ b/thirdparty/faiss/.github/workflows/build.yml @@ -0,0 +1,244 @@ +name: Build +on: + workflow_dispatch: + pull_request: + branches: + - main + push: + tags: + - 'v*' +env: + OMP_NUM_THREADS: '10' + MKL_THREADING_LAYER: GNU +jobs: + format: + name: Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install clang-format + run: | + sudo apt-get update -y + sudo apt-get install -y wget + sudo apt install -y lsb-release wget software-properties-common gnupg + wget https://apt.llvm.org/llvm.sh + chmod u+x llvm.sh + sudo ./llvm.sh 18 + sudo apt-get install -y git-core clang-format-18 + - name: Verify clang-format + run: | + git ls-files | grep -E '\.(cpp|h|cu|cuh)$' | xargs clang-format-18 -i + if git diff --quiet; then + echo "Formatting OK!" + else + echo "Formatting not OK!" + echo "------------------" + git --no-pager diff --color + exit 1 + fi + linux-x86_64-cmake: + name: Linux x86_64 (cmake) + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./.github/actions/build_cmake + linux-x86_64-AVX2-cmake: + name: Linux x86_64 AVX2 (cmake) + needs: linux-x86_64-cmake + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./.github/actions/build_cmake + with: + opt_level: avx2 + linux-x86_64-AVX512-cmake: + name: Linux x86_64 AVX512 (cmake) + if: false # TODO: enable when GitHub Actions adds AVX-512 hosts + needs: linux-x86_64-cmake + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./.github/actions/build_cmake + with: + opt_level: avx512 + linux-x86_64-GPU-cmake: + name: Linux x86_64 GPU (cmake) + needs: linux-x86_64-cmake + runs-on: 4-core-ubuntu-gpu-t4 + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./.github/actions/build_cmake + with: + gpu: ON + linux-x86_64-GPU-w-RAFT-cmake: + name: Linux x86_64 GPU w/ RAFT (cmake) + needs: linux-x86_64-cmake + runs-on: 4-core-ubuntu-gpu-t4 + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: ./.github/actions/build_cmake + with: + gpu: ON + raft: ON + linux-x86_64-conda: + name: Linux x86_64 (conda) + needs: linux-x86_64-cmake + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + windows-x86_64-conda: + name: Windows x86_64 (conda) + needs: linux-x86_64-cmake + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + linux-arm64-conda: + name: Linux arm64 (conda) + needs: linux-x86_64-cmake + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + linux-x86_64-packages: + name: Linux x86_64 packages + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + linux-x86_64-GPU-packages-CUDA-11-4-4: + name: Linux x86_64 GPU packages (CUDA 11.4.4) + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + FAISS_FLATTEN_CONDA_INCLUDES: "1" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + cuda: "11.4.4" + compiler_version: "11.2" + linux-x86_64-GPU-RAFT-packages-CUDA11-8-0: + name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8.0) + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + raft: "ON" + cuda: "11.8.0" + compiler_version: "11.2" + linux-x86_64-GPU-packages-CUDA-12-1-1: + name: Linux x86_64 GPU packages (CUDA 12.1.1) + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + cuda: "12.1.1" + compiler_version: "11.2" + linux-x86_64-GPU-RAFT-packages-CUDA12-1-1: + name: Linux x86_64 GPU w/ RAFT packages (CUDA 12.1.1) + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + raft: "ON" + cuda: "12.1.1" + compiler_version: "11.2" + windows-x86_64-packages: + name: Windows x86_64 packages + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + osx-arm64-packages: + name: OSX arm64 packages + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: macos-14 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main + linux-arm64-packages: + name: Linux arm64 packages + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + with: + label: main diff --git a/thirdparty/faiss/.github/workflows/nightly.yml b/thirdparty/faiss/.github/workflows/nightly.yml new file mode 100644 index 000000000..eabee0774 --- /dev/null +++ b/thirdparty/faiss/.github/workflows/nightly.yml @@ -0,0 +1,139 @@ +name: Nightly +on: + schedule: + - cron: '10 1 * * *' +env: + OMP_NUM_THREADS: '10' + MKL_THREADING_LAYER: GNU +jobs: + linux-x86_64-nightly: + name: Linux x86_64 nightlies + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + linux-x86_64-GPU-CUDA-11-4-4-nightly: + name: Linux x86_64 GPU nightlies (CUDA 11.4.4) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "60-real;61-real;62-real;70-real;72-real;75-real;80;86-real" + FAISS_FLATTEN_CONDA_INCLUDES: "1" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuda: "11.4.4" + compiler_version: "11.2" + linux-x86_64-GPU-RAFT-CUDA11-8-0-nightly: + name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + raft: "ON" + cuda: "11.8.0" + compiler_version: "11.2" + linux-x86_64-GPU-CUDA-12-1-1-nightly: + name: Linux x86_64 GPU nightlies (CUDA 12.1.1) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + cuda: "12.1.1" + compiler_version: "11.2" + linux-x86_64-GPU-RAFT-CUDA12-1-1-nightly: + name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.1) + runs-on: 4-core-ubuntu-gpu-t4 + env: + CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + raft: "ON" + cuda: "12.1.1" + compiler_version: "11.2" + windows-x86_64-nightly: + name: Windows x86_64 nightlies + runs-on: windows-2019 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + osx-arm64-nightly: + name: OSX arm64 nightlies + runs-on: macos-14 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly + linux-arm64-nightly: + name: Linux arm64 nightlies + runs-on: 2-core-ubuntu-arm + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + fetch-tags: true + - uses: ./.github/actions/build_conda + env: + ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} + with: + label: nightly diff --git a/thirdparty/faiss/benchs/bench_cppcontrib_sa_decode.cpp b/thirdparty/faiss/benchs/bench_cppcontrib_sa_decode.cpp index f0266172a..b960fb7c6 100644 --- a/thirdparty/faiss/benchs/bench_cppcontrib_sa_decode.cpp +++ b/thirdparty/faiss/benchs/bench_cppcontrib_sa_decode.cpp @@ -213,9 +213,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_seq" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_seq\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -261,10 +261,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error = getError(n, d, outputFaiss, outputKernel1); - - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -324,9 +323,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error1 = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel1 << "\t" << error1 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel1 + << "\t" << error1 << std::endl; // kernels: accum 2 points, shared centroids StopWatch swKernel2; @@ -353,9 +352,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error2 = getError(n, d, outputFaiss, outputKernel2); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2 << "\t" << error2 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2 + << "\t" << error2 << std::endl; // kernels: accum 2 points, unique centroids StopWatch swKernel2u; @@ -384,9 +383,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error2u = getError(n, d, outputFaiss, outputKernel2u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2u << "\t" << error2u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2u + << "\t" << error2u << std::endl; // kernels: accum 3 points, shared centroids StopWatch swKernel3; @@ -418,9 +417,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error3 = getError(n, d, outputFaiss, outputKernel3); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3 << "\t" << error3 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3 + << "\t" << error3 << std::endl; // kernels: accum 3 points, unique centroids StopWatch swKernel3u; @@ -456,9 +455,9 @@ static void verifyIndex2LevelDecoder( // evaluate the error const double error3u = getError(n, d, outputFaiss, outputKernel3u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3u << "\t" << error3u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3u + << "\t" << error3u << std::endl; } } @@ -524,9 +523,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_seq" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_seq\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -573,9 +572,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -641,9 +640,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error1 = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel1 << "\t" << error1 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel1 + << "\t" << error1 << std::endl; // kernels: accum 2 points, shared centroids StopWatch swKernel2; @@ -675,9 +674,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error2 = getError(n, d, outputFaiss, outputKernel2); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2 << "\t" << error2 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2 + << "\t" << error2 << std::endl; // kernels: accum 2 points, unique centroids StopWatch swKernel2u; @@ -711,9 +710,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error2u = getError(n, d, outputFaiss, outputKernel2u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2u << "\t" << error2u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2u + << "\t" << error2u << std::endl; // kernels: accum 3 points, shared centroids StopWatch swKernel3; @@ -750,9 +749,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error3 = getError(n, d, outputFaiss, outputKernel3); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3 << "\t" << error3 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3 + << "\t" << error3 << std::endl; // kernels: accum 3 points, unique centroids StopWatch swKernel3u; @@ -793,9 +792,9 @@ static void verifyMinMaxIndex2LevelDecoder( // evaluate the error const double error3u = getError(n, d, outputFaiss, outputKernel3u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3u << "\t" << error3u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3u + << "\t" << error3u << std::endl; } } @@ -851,9 +850,9 @@ static void verifyIndexPQDecoder( // evaluate the error double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_seq" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_seq\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -899,9 +898,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -961,9 +960,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error1 = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel1 << "\t" << error1 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel1 + << "\t" << error1 << std::endl; // kernels: accum 2 points, shared centroids StopWatch swKernel2; @@ -989,9 +988,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error2 = getError(n, d, outputFaiss, outputKernel2); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2 << "\t" << error2 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2 + << "\t" << error2 << std::endl; // kernels: accum 2 points, unique centroids StopWatch swKernel2u; @@ -1018,9 +1017,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error2u = getError(n, d, outputFaiss, outputKernel2u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2u << "\t" << error2u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2u + << "\t" << error2u << std::endl; // kernels: accum 3 points, shared centroids StopWatch swKernel3; @@ -1051,9 +1050,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error3 = getError(n, d, outputFaiss, outputKernel3); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3 << "\t" << error3 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3 + << "\t" << error3 << std::endl; // kernels: accum 3 points, unique centroids StopWatch swKernel3u; @@ -1086,9 +1085,9 @@ static void verifyIndexPQDecoder( // evaluate the error const double error3u = getError(n, d, outputFaiss, outputKernel3u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3u << "\t" << error3u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3u + << "\t" << error3u << std::endl; } } @@ -1149,9 +1148,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_seq" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_seq\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1197,9 +1196,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "store_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel << "\t" << error << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\tstore_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel + << "\t" << error << std::endl; } ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1264,9 +1263,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error1 = getError(n, d, outputFaiss, outputKernel1); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel1 << "\t" << error1 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel1 + << "\t" << error1 << std::endl; // kernels: accum 2 points, shared centroids StopWatch swKernel2; @@ -1297,9 +1296,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error2 = getError(n, d, outputFaiss, outputKernel2); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2 << "\t" << error2 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2 + << "\t" << error2 << std::endl; // kernels: accum 2 points, unique centroids StopWatch swKernel2u; @@ -1331,9 +1330,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error2u = getError(n, d, outputFaiss, outputKernel2u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum2u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel2u << "\t" << error2u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum2u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel2u + << "\t" << error2u << std::endl; // kernels: accum 3 points, shared centroids StopWatch swKernel3; @@ -1369,9 +1368,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error3 = getError(n, d, outputFaiss, outputKernel3); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3 << "\t" << error3 << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3 + << "\t" << error3 << std::endl; // kernels: accum 3 points, unique centroids StopWatch swKernel3u; @@ -1409,9 +1408,9 @@ static void verifyMinMaxIndexPQDecoder( // evaluate the error const double error3u = getError(n, d, outputFaiss, outputKernel3u); - std::cout << description << "\t" << n << "\t" << d << "\t" - << "accum3u_rnd" << "\t" << nIterations << "\t" << timeFaiss - << "\t" << timeKernel3u << "\t" << error3u << std::endl; + std::cout << description << "\t" << n << "\t" << d << "\taccum3u_rnd\t" + << nIterations << "\t" << timeFaiss << "\t" << timeKernel3u + << "\t" << error3u << std::endl; } } @@ -1484,8 +1483,10 @@ int main(int argc, char** argv) { (N_ITERATIONS % 6) == 0, "Number of iterations should be 6*x"); // print the header - std::cout << "Codec\t" << "n\t" << "d\t" << "Experiment\t" << "Iterations\t" - << "Faiss time\t" << "SADecodeKernel time\t" << "Error" + auto delim = "\t"; + std::cout << "Codec" << delim << "n" << delim << "d" << delim + << "Experiment" << delim << "Iterations" << delim << "Faiss time" + << delim << "SADecodeKernel time" << delim << "Error" << std::endl; // The following experiment types are available: diff --git a/thirdparty/faiss/benchs/bench_fw/benchmark.py b/thirdparty/faiss/benchs/bench_fw/benchmark.py index 1053f9938..237d08bd9 100644 --- a/thirdparty/faiss/benchs/bench_fw/benchmark.py +++ b/thirdparty/faiss/benchs/bench_fw/benchmark.py @@ -4,8 +4,7 @@ # LICENSE file in the root directory of this source tree. import logging -from copy import copy -from dataclasses import dataclass +from dataclasses import dataclass, field from operator import itemgetter from statistics import mean, median from typing import Any, Dict, List, Optional @@ -16,7 +15,16 @@ from scipy.optimize import curve_fit -from .descriptors import DatasetDescriptor, IndexDescriptor +from .benchmark_io import BenchmarkIO + +from .descriptors import ( + CodecDescriptor, + DatasetDescriptor, + IndexDescriptor, + IndexDescriptorClassic, + KnnDescriptor, +) + from .index import Index, IndexFromCodec, IndexFromFactory from .utils import dict_merge @@ -185,15 +193,9 @@ def sigmoid(x, a, b, c): @dataclass -class Benchmark: +class IndexOperator: num_threads: int - training_vectors: Optional[DatasetDescriptor] = None - database_vectors: Optional[DatasetDescriptor] = None - query_vectors: Optional[DatasetDescriptor] = None - index_descs: Optional[List[IndexDescriptor]] = None - range_ref_index_desc: Optional[str] = None - k: Optional[int] = None - distance_metric: str = "L2" + distance_metric: str def __post_init__(self): if self.distance_metric == "IP": @@ -203,18 +205,167 @@ def __post_init__(self): else: raise ValueError - def set_io(self, benchmark_io): + def set_io(self, benchmark_io: BenchmarkIO): self.io = benchmark_io self.io.distance_metric = self.distance_metric self.io.distance_metric_type = self.distance_metric_type - def get_index_desc(self, factory: str) -> Optional[IndexDescriptor]: + +@dataclass +class TrainOperator(IndexOperator): + codec_descs: List[CodecDescriptor] = field(default_factory=lambda: []) + + def get_desc(self, name: str) -> Optional[CodecDescriptor]: + for desc in self.codec_descs: + if desc.get_name() == name: + return desc + elif desc.factory == name: + return desc + return None + + def get_flat_desc(self, name=None) -> Optional[CodecDescriptor]: + for desc in self.codec_descs: + desc_name = desc.get_name() + if desc_name == name: + return desc + if desc_name.startswith("Flat"): + return desc + return None + + def build_index_wrapper(self, codec_desc: CodecDescriptor): + if hasattr(codec_desc, "index"): + return + + if codec_desc.factory is not None: + assert ( + codec_desc.factory == "Flat" or codec_desc.training_vectors is not None + ) + index = IndexFromFactory( + num_threads=self.num_threads, + d=codec_desc.d, + metric=self.distance_metric, + construction_params=codec_desc.construction_params, + factory=codec_desc.factory, + training_vectors=codec_desc.training_vectors, + codec_name=codec_desc.get_name(), + ) + index.set_io(self.io) + codec_desc.index = index + else: + assert codec_desc.is_trained() + + def train( + self, codec_desc: CodecDescriptor, results: Dict[str, Any], dry_run=False + ): + self.build_index_wrapper(codec_desc) + if codec_desc.is_trained(): + return results, None + + if dry_run: + meta, requires = codec_desc.index.fetch_meta(dry_run=dry_run) + else: + codec_desc.index.get_codec() + meta, requires = codec_desc.index.fetch_meta(dry_run=dry_run) + assert requires is None + + if requires is None: + results["indices"][codec_desc.get_name()] = meta + return results, requires + + +@dataclass +class BuildOperator(IndexOperator): + index_descs: List[IndexDescriptor] = field(default_factory=lambda: []) + + def get_desc(self, name: str) -> Optional[IndexDescriptor]: + for desc in self.index_descs: + if desc.get_name() == name: + return desc + return None + + def get_flat_desc(self, name=None) -> Optional[IndexDescriptor]: for desc in self.index_descs: - if desc.factory == factory: + desc_name = desc.get_name() + if desc_name == name: + return desc + if desc_name.startswith("Flat"): + return desc + return None + + def build_index_wrapper(self, index_desc: IndexDescriptor): + if hasattr(index_desc, "index"): + return + + if hasattr(index_desc.codec_desc, "index"): + index_desc.index = index_desc.codec_desc.index + index_desc.index.database_vectors = index_desc.database_desc + index_desc.index.index_name = index_desc.get_name() + return + + if index_desc.codec_desc is not None: + index = IndexFromCodec( + num_threads=self.num_threads, + d=index_desc.d, + metric=self.distance_metric, + database_vectors=index_desc.database_desc, + bucket=index_desc.codec_desc.bucket, + path=index_desc.codec_desc.path, + index_name=index_desc.get_name(), + codec_name=index_desc.codec_desc.get_name(), + ) + index.set_io(self.io) + index_desc.index = index + else: + assert index_desc.is_built() + + def build(self, index_desc: IndexDescriptor, results: Dict[str, Any]): + self.build_index_wrapper(index_desc) + if index_desc.is_built(): + return + index_desc.index.get_index() + + +@dataclass +class SearchOperator(IndexOperator): + knn_descs: List[KnnDescriptor] = field(default_factory=lambda: []) + range: bool = False + + def get_desc(self, name: str) -> Optional[KnnDescriptor]: + for desc in self.knn_descs: + if desc.get_name() == name: + return desc + return None + + def get_flat_desc(self, name=None) -> Optional[KnnDescriptor]: + for desc in self.knn_descs: + if desc.get_name().startswith("Flat"): return desc return None - def range_search_reference(self, index, parameters, range_metric): + def build_index_wrapper(self, knn_desc: KnnDescriptor): + if hasattr(knn_desc, "index"): + return + + if knn_desc.index_desc.index is not None: + knn_desc.index = knn_desc.index_desc.index + knn_desc.index.knn_name = knn_desc.get_name() + knn_desc.index.search_params = knn_desc.search_params + else: + index = Index( + num_threads=self.num_threads, + d=knn_desc.d, + metric=self.distance_metric, + bucket=knn_desc.index_desc.bucket, + index_path=knn_desc.index_desc.path, + # knn_name=knn_desc.get_name(), + search_params=knn_desc.search_params, + ) + index.set_io(self.io) + knn_desc.index = index + + knn_desc.index.get_index() + + def range_search_reference(self, index, parameters, range_metric, query_dataset): logger.info("range_search_reference: begin") if isinstance(range_metric, list): assert len(range_metric) > 0 @@ -231,8 +382,9 @@ def range_search_reference(self, index, parameters, range_metric): index, parameters, radius=m_radius, + query_dataset=query_dataset, ) - flat = index.factory == "Flat" + flat = index.is_flat_index() ( gt_radius, range_search_metric_function, @@ -251,11 +403,11 @@ def range_search_reference(self, index, parameters, range_metric): coefficients_training_data, ) - def estimate_range(self, index, parameters, range_scoring_radius): + def estimate_range(self, index, parameters, range_scoring_radius, query_dataset): D, I, R, P, _ = index.knn_search( False, parameters, - self.query_vectors, + query_dataset, self.k, ) samples = [] @@ -273,6 +425,7 @@ def range_search( dry_run, index: Index, search_parameters: Optional[Dict[str, int]], + query_dataset: DatasetDescriptor, radius: Optional[float] = None, gt_radius: Optional[float] = None, range_search_metric_function=None, @@ -285,25 +438,21 @@ def range_search( gt_radius if index.is_flat() else self.estimate_range( - index, - search_parameters, - gt_radius, + index, search_parameters, gt_radius, query_dataset ) ) logger.info(f"Radius={radius}") lims, D, I, R, P, requires = index.range_search( dry_run=dry_run, search_parameters=search_parameters, - query_vectors=self.query_vectors, + query_vectors=query_dataset, radius=radius, ) if requires is not None: return None, None, None, None, None, requires if range_search_metric_function is not None: range_search_metric = range_search_metric_function(R) - range_search_pr = range_search_pr_curve( - D, range_search_metric, gt_rsm - ) + range_search_pr = range_search_pr_curve(D, range_search_metric, gt_rsm) range_score_sum = np.sum(range_search_metric).item() P |= { "range_score_sum": range_score_sum, @@ -312,23 +461,29 @@ def range_search( } return lims, D, I, R, P, requires - def range_ground_truth(self, gt_radius, range_search_metric_function): + def range_ground_truth( + self, gt_radius, range_search_metric_function, flat_desc=None + ): logger.info("range_ground_truth: begin") - flat_desc = self.get_index_desc("Flat") + if flat_desc is None: + flat_desc = self.get_flat_desc() lims, D, I, R, P, _ = self.range_search( False, flat_desc.index, search_parameters=None, radius=gt_radius, + query_dataset=flat_desc.query_dataset, ) gt_rsm = np.sum(range_search_metric_function(R)).item() logger.info("range_ground_truth: end") return gt_rsm - def knn_ground_truth(self): + def knn_ground_truth(self, flat_desc=None): logger.info("knn_ground_truth: begin") - flat_desc = self.get_index_desc("Flat") + if flat_desc is None: + flat_desc = self.get_flat_desc() self.build_index_wrapper(flat_desc) + # TODO(kuarora): Consider moving gt results(gt_knn_D, gt_knn_I) to the index as there can be multiple ground truths. ( self.gt_knn_D, self.gt_knn_I, @@ -338,8 +493,8 @@ def knn_ground_truth(self): ) = flat_desc.index.knn_search( dry_run=False, search_parameters=None, - query_vectors=self.query_vectors, - k=self.k, + query_vectors=flat_desc.query_dataset, + k=flat_desc.k, ) assert requires is None logger.info("knn_ground_truth: end") @@ -369,6 +524,7 @@ def experiment(parameters, cost_metric, perf_metric): results["experiments"][key] = metrics return metrics[cost_metric], metrics[perf_metric], None + requires = None for cost_metric in cost_metrics: for perf_metric in perf_metrics: op = index.get_operating_points() @@ -384,52 +540,52 @@ def experiment(parameters, cost_metric, perf_metric): return results, requires def knn_search_benchmark( - self, dry_run, results: Dict[str, Any], index: Index + self, dry_run, results: Dict[str, Any], knn_desc: KnnDescriptor ): return self.search_benchmark( name="knn_search", - search_func=lambda parameters: index.knn_search( + search_func=lambda parameters: knn_desc.index.knn_search( dry_run, parameters, - self.query_vectors, - self.k, + knn_desc.query_dataset, + knn_desc.k, self.gt_knn_I, self.gt_knn_D, )[3:], - key_func=lambda parameters: index.get_knn_search_name( + key_func=lambda parameters: knn_desc.index.get_knn_search_name( search_parameters=parameters, - query_vectors=self.query_vectors, - k=self.k, + query_vectors=knn_desc.query_dataset, + k=knn_desc.k, reconstruct=False, ), cost_metrics=["time"], perf_metrics=["knn_intersection", "distance_ratio"], results=results, - index=index, + index=knn_desc.index, ) def reconstruct_benchmark( - self, dry_run, results: Dict[str, Any], index: Index + self, dry_run, results: Dict[str, Any], knn_desc: KnnDescriptor ): return self.search_benchmark( name="reconstruct", - search_func=lambda parameters: index.reconstruct( + search_func=lambda parameters: knn_desc.index.reconstruct( dry_run, parameters, - self.query_vectors, - self.k, + knn_desc.query_dataset, + knn_desc.k, self.gt_knn_I, ), - key_func=lambda parameters: index.get_knn_search_name( + key_func=lambda parameters: knn_desc.index.get_knn_search_name( search_parameters=parameters, - query_vectors=self.query_vectors, - k=self.k, + query_vectors=knn_desc.query_dataset, + k=knn_desc.k, reconstruct=True, ), cost_metrics=["encode_time"], perf_metrics=["sym_recall"], results=results, - index=index, + index=knn_desc.index, ) def range_search_benchmark( @@ -442,6 +598,7 @@ def range_search_benchmark( gt_radius: float, range_search_metric_function, gt_rsm: float, + query_dataset: DatasetDescriptor, ): return self.search_benchmark( name="range_search", @@ -453,10 +610,11 @@ def range_search_benchmark( gt_radius=gt_radius, range_search_metric_function=range_search_metric_function, gt_rsm=gt_rsm, + query_dataset=query_dataset, )[4:], key_func=lambda parameters: index.get_range_search_name( search_parameters=parameters, - query_vectors=self.query_vectors, + query_vectors=query_dataset, radius=radius, ) + metric_key, @@ -466,69 +624,88 @@ def range_search_benchmark( index=index, ) - def build_index_wrapper(self, index_desc: IndexDescriptor): - if hasattr(index_desc, "index"): - return - if index_desc.factory is not None: - training_vectors = copy(self.training_vectors) - if index_desc.training_size is not None: - training_vectors.num_vectors = index_desc.training_size - index = IndexFromFactory( - num_threads=self.num_threads, - d=self.d, - metric=self.distance_metric, - database_vectors=self.database_vectors, - search_params=index_desc.search_params, - construction_params=index_desc.construction_params, - factory=index_desc.factory, - training_vectors=training_vectors, - ) + +@dataclass +class ExecutionOperator: + distance_metric: str = "L2" + num_threads: int = 1 + train_op: Optional[TrainOperator] = None + build_op: Optional[BuildOperator] = None + search_op: Optional[SearchOperator] = None + + def __post_init__(self): + if self.distance_metric == "IP": + self.distance_metric_type = faiss.METRIC_INNER_PRODUCT + elif self.distance_metric == "L2": + self.distance_metric_type = faiss.METRIC_L2 else: - index = IndexFromCodec( - num_threads=self.num_threads, - d=self.d, - metric=self.distance_metric, - database_vectors=self.database_vectors, - search_params=index_desc.search_params, - construction_params=index_desc.construction_params, - path=index_desc.path, - bucket=index_desc.bucket, - ) - index.set_io(self.io) - index_desc.index = index + raise ValueError - def clone_one(self, index_desc): - benchmark = Benchmark( - num_threads=self.num_threads, - training_vectors=self.training_vectors, - database_vectors=self.database_vectors, - query_vectors=self.query_vectors, - index_descs=[self.get_index_desc("Flat"), index_desc], - range_ref_index_desc=self.range_ref_index_desc, - k=self.k, - distance_metric=self.distance_metric, - ) - benchmark.set_io(self.io.clone()) - return benchmark + def set_io(self, io: BenchmarkIO): + self.io = io + self.io.distance_metric = self.distance_metric + self.io.distance_metric_type = self.distance_metric_type + if self.train_op: + self.train_op.set_io(io) + if self.build_op: + self.build_op.set_io(io) + if self.search_op: + self.search_op.set_io(io) + + def train_one(self, codec_desc: CodecDescriptor, results: Dict[str, Any], dry_run): + faiss.omp_set_num_threads(self.num_threads) + assert self.train_op is not None + self.train_op.train(codec_desc, results, dry_run) + + def train(self, results, dry_run=False): + faiss.omp_set_num_threads(self.num_threads) + if self.train_op is None: + return + + for codec_desc in self.train_op.codec_descs: + self.train_one(codec_desc, results, dry_run) + + def build_one(self, results: Dict[str, Any], index_desc: IndexDescriptor): + faiss.omp_set_num_threads(self.num_threads) + assert self.build_op is not None + self.build_op.build(index_desc, results) + + def build(self, results: Dict[str, Any]): + faiss.omp_set_num_threads(self.num_threads) + if self.build_op is None: + return + + for index_desc in self.build_op.index_descs: + self.build_one(index_desc, results) + + def search(self): + faiss.omp_set_num_threads(self.num_threads) + if self.search_op is None: + return - def benchmark_one( + for index_desc in self.search_op.knn_descs: + self.search_one(index_desc) + + def search_one( self, - dry_run, + knn_desc: KnnDescriptor, results: Dict[str, Any], - index_desc: IndexDescriptor, - train, - reconstruct, - knn, - range, + dry_run=False, + range=False, ): faiss.omp_set_num_threads(self.num_threads) + assert self.search_op is not None + if not dry_run: - self.knn_ground_truth() - self.build_index_wrapper(index_desc) - meta, requires = index_desc.index.fetch_meta(dry_run=dry_run) + self.create_gt_knn(knn_desc) + self.create_range_ref_knn(knn_desc) + + self.search_op.build_index_wrapper(knn_desc) + meta, requires = knn_desc.index.fetch_meta(dry_run=dry_run) if requires is not None: - return results, (requires if train else None) - results["indices"][index_desc.index.get_codec_name()] = meta + # return results, (requires if train else None) + return results, requires + results["indices"][knn_desc.index.get_codec_name()] = meta # results, requires = self.reconstruct_benchmark( # dry_run=True, @@ -545,33 +722,32 @@ def benchmark_one( # index=index_desc.index, # ) # assert requires is None - - results, requires = self.knn_search_benchmark( + results, requires = self.search_op.knn_search_benchmark( dry_run=True, results=results, - index=index_desc.index, + knn_desc=knn_desc, ) - if knn and requires is not None: + if requires is not None: if dry_run: return results, requires else: - results, requires = self.knn_search_benchmark( + results, requires = self.search_op.knn_search_benchmark( dry_run=False, results=results, - index=index_desc.index, + knn_desc=knn_desc, ) assert requires is None if ( - self.range_ref_index_desc is None - or not index_desc.index.supports_range_search() + knn_desc.range_ref_index_desc is None or + not knn_desc.index.supports_range_search() ): return results, None - ref_index_desc = self.get_index_desc(self.range_ref_index_desc) + ref_index_desc = self.search_op.get_desc(knn_desc.range_ref_index_desc) if ref_index_desc is None: raise ValueError( - f"Unknown range index {self.range_ref_index_desc}" + f"{knn_desc.get_name()}: Unknown range index {knn_desc.range_ref_index_desc}" ) if ref_index_desc.range_metrics is None: raise ValueError( @@ -583,91 +759,360 @@ def benchmark_one( range_search_metric_function, coefficients, coefficients_training_data, - ) = self.range_search_reference( + ) = self.search_op.range_search_reference( ref_index_desc.index, ref_index_desc.search_params, range_metric, ) - gt_rsm = self.range_ground_truth( + gt_rsm = self.search_op.range_ground_truth( gt_radius, range_search_metric_function ) - results, requires = self.range_search_benchmark( + results, requires = self.search_op.range_search_benchmark( dry_run=True, results=results, - index=index_desc.index, + index=knn_desc.index, metric_key=metric_key, - radius=index_desc.radius, + radius=knn_desc.radius, gt_radius=gt_radius, range_search_metric_function=range_search_metric_function, gt_rsm=gt_rsm, + query_vectors=knn_desc.query_dataset, ) if range and requires is not None: if dry_run: return results, requires else: - results, requires = self.range_search_benchmark( + results, requires = self.search_op.range_search_benchmark( dry_run=False, results=results, - index=index_desc.index, + index=knn_desc.index, metric_key=metric_key, - radius=index_desc.radius, + radius=knn_desc.radius, gt_radius=gt_radius, range_search_metric_function=range_search_metric_function, gt_rsm=gt_rsm, + query_vectors=knn_desc.query_dataset, ) assert requires is None return results, None - def benchmark( - self, - result_file=None, - local=False, - train=False, - reconstruct=False, - knn=False, - range=False, - ): - logger.info("begin evaluate") + def create_gt_codec( + self, codec_desc, results, train=True + ) -> Optional[CodecDescriptor]: + gt_codec_desc = None + if self.train_op: + gt_codec_desc = self.train_op.get_flat_desc(codec_desc.flat_name()) + if gt_codec_desc is None: + gt_codec_desc = CodecDescriptor( + factory="Flat", + d=codec_desc.d, + metric=codec_desc.metric, + num_threads=self.num_threads, + ) + self.train_op.codec_descs.insert(0, gt_codec_desc) + if train: + self.train_op.train(gt_codec_desc, results, dry_run=False) - faiss.omp_set_num_threads(self.num_threads) - results = {"indices": {}, "experiments": {}} - xq = self.io.get_dataset(self.query_vectors) - self.d = xq.shape[1] - if self.get_index_desc("Flat") is None: - self.index_descs.append(IndexDescriptor(factory="Flat")) + return gt_codec_desc - self.knn_ground_truth() + def create_gt_index( + self, index_desc: IndexDescriptor, results: Dict[str, Any], build=True + ) -> Optional[IndexDescriptor]: + gt_index_desc = None + if self.build_op: + gt_index_desc = self.build_op.get_flat_desc(index_desc.flat_name()) + if gt_index_desc is None: + gt_codec_desc = self.train_op.get_flat_desc( + index_desc.codec_desc.flat_name() + ) + assert gt_codec_desc is not None + gt_index_desc = IndexDescriptor( + d=index_desc.d, + metric=index_desc.metric, + num_threads=self.num_threads, + codec_desc=gt_codec_desc, + database_desc=index_desc.database_desc, + ) + self.build_op.index_descs.insert(0, gt_index_desc) + if build: + self.build_op.build(gt_index_desc, results) - if self.range_ref_index_desc is not None: - index_desc = self.get_index_desc(self.range_ref_index_desc) - if index_desc is None: - raise ValueError( - f"Unknown range index {self.range_ref_index_desc}" + return gt_index_desc + + def create_gt_knn(self, knn_desc, search=True) -> Optional[KnnDescriptor]: + gt_knn_desc = None + if self.search_op: + gt_knn_desc = self.search_op.get_flat_desc(knn_desc.flat_name()) + if gt_knn_desc is None: + gt_index_desc = self.build_op.get_flat_desc( + knn_desc.index_desc.flat_name() + ) + assert gt_index_desc is not None + gt_knn_desc = KnnDescriptor( + d=knn_desc.d, + metric=knn_desc.metric, + num_threads=self.num_threads, + index_desc=gt_index_desc, + query_dataset=knn_desc.query_dataset, + k=knn_desc.k, ) - if index_desc.range_metrics is None: + self.search_op.knn_descs.insert(0, gt_knn_desc) + if search: + self.search_op.build_index_wrapper(gt_knn_desc) + self.search_op.knn_ground_truth(gt_knn_desc) + + return gt_knn_desc + + def create_range_ref_knn(self, knn_desc): + if ( + knn_desc.range_ref_index_desc is None or + not knn_desc.index.supports_range_search() + ): + return + + if knn_desc.range_ref_index_desc is not None: + ref_index_desc = self.get_desc(knn_desc.range_ref_index_desc) + if ref_index_desc is None: + raise ValueError(f"Unknown range index {knn_desc.range_ref_index_desc}") + if ref_index_desc.range_metrics is None: raise ValueError( - f"Range index {index_desc.factory} has no radius_score" + f"Range index {knn_desc.get_name()} has no radius_score" ) results["metrics"] = {} - for metric_key, range_metric in index_desc.range_metrics.items(): + self.build_index_wrapper(ref_index_desc) + for metric_key, range_metric in ref_index_desc.range_metrics.items(): ( - gt_radius, + knn_desc.gt_radius, range_search_metric_function, coefficients, coefficients_training_data, ) = self.range_search_reference( - index_desc.index, index_desc.search_params, range_metric + knn_desc.index, knn_desc.search_params, range_metric ) results["metrics"][metric_key] = { "coefficients": coefficients, "training_data": coefficients_training_data, } - gt_rsm = self.range_ground_truth( - gt_radius, range_search_metric_function + knn_desc.gt_rsm = self.range_ground_truth( + knn_desc.gt_radius, range_search_metric_function + ) + + def create_ground_truths(self, results: Dict[str, Any]): + # TODO: Create all ground truth descriptors and put them in index descriptor as reference + if self.train_op is not None: + for codec_desc in self.train_op.codec_descs: + self.create_gt_codec(codec_desc, results) + + if self.build_op is not None: + for index_desc in self.build_op.index_descs: + self.create_gt_index( + index_desc, results + ) # may need to pass results in future + + if self.search_op is not None: + for knn_desc in self.search_op.knn_descs: + self.create_gt_knn(knn_desc, results) + self.create_range_ref_knn(knn_desc) + + def execute(self, results: Dict[str, Any], dry_run: False): + if self.train_op is not None: + for desc in self.train_op.codec_descs: + results, requires = self.train_op.train(desc, results, dry_run=dry_run) + if dry_run: + if requires is None: + continue + return results, requires + assert requires is None + + if self.build_op is not None: + for desc in self.build_op.index_descs: + self.build_op.build(desc, results) + if self.search_op is not None: + for desc in self.search_op.knn_descs: + results, requires = self.search_one( + knn_desc=desc, results=results, dry_run=dry_run, range=self.search_op.range + ) + if dry_run: + if requires is None: + continue + return results, requires + + assert requires is None + return results, None + + def execute_2(self, result_file=None): + results = {"indices": {}, "experiments": {}} + results, requires = self.execute(results=results) + assert requires is None + if result_file is not None: + self.io.write_json(results, result_file, overwrite=True) + + def add_index_descs(self, codec_desc, index_desc, knn_desc): + if codec_desc is not None: + self.train_op.codec_descs.append(codec_desc) + if index_desc is not None: + self.build_op.index_descs.append(index_desc) + if knn_desc is not None: + self.search_op.knn_descs.append(knn_desc) + + +@dataclass +class Benchmark: + num_threads: int + training_vectors: Optional[DatasetDescriptor] = None + database_vectors: Optional[DatasetDescriptor] = None + query_vectors: Optional[DatasetDescriptor] = None + index_descs: Optional[List[IndexDescriptorClassic]] = None + range_ref_index_desc: Optional[str] = None + k: int = 1 + distance_metric: str = "L2" + + def set_io(self, benchmark_io): + self.io = benchmark_io + + def get_embedding_dimension(self): + if self.training_vectors is not None: + xt = self.io.get_dataset(self.training_vectors) + return xt.shape[1] + if self.database_vectors is not None: + xb = self.io.get_dataset(self.database_vectors) + return xb.shape[1] + if self.query_vectors is not None: + xq = self.io.get_dataset(self.query_vectors) + return xq.shape[1] + raise ValueError("Failed to determine dimension of dataset") + + def create_descriptors( + self, ci_desc: IndexDescriptorClassic, train, build, knn, reconstruct, range + ): + codec_desc = None + index_desc = None + knn_desc = None + dim = self.get_embedding_dimension() + if train and ci_desc.factory is not None: + codec_desc = CodecDescriptor( + d=dim, + metric=self.distance_metric, + num_threads=self.num_threads, + factory=ci_desc.factory, + construction_params=ci_desc.construction_params, + training_vectors=self.training_vectors, + ) + if build: + if codec_desc is None: + assert ci_desc.path is not None + codec_desc = CodecDescriptor( + d=dim, + metric=self.distance_metric, + num_threads=self.num_threads, + bucket=ci_desc.bucket, + path=ci_desc.path, + ) + index_desc = IndexDescriptor( + d=codec_desc.d, + metric=self.distance_metric, + num_threads=self.num_threads, + codec_desc=codec_desc, + database_desc=self.database_vectors, + ) + if knn or range: + if index_desc is None: + assert ci_desc.path is not None + index_desc = IndexDescriptor( + d=dim, + metric=self.distance_metric, + num_threads=self.num_threads, + bucket=ci_desc.bucket, + path=ci_desc.path, ) + knn_desc = KnnDescriptor( + d=dim, + metric=self.distance_metric, + num_threads=self.num_threads, + index_desc=index_desc, + query_dataset=self.query_vectors, + search_params=ci_desc.search_params, + range_metrics=ci_desc.range_metrics, + radius=ci_desc.radius, + k=self.k, + ) - self.index_descs = list(dict.fromkeys(self.index_descs)) + return codec_desc, index_desc, knn_desc + + def create_execution_operator( + self, + train, + build, + knn, + reconstruct, + range, + ) -> ExecutionOperator: + # all operators are created, as ground truth are always created in benchmarking + train_op = TrainOperator( + num_threads=self.num_threads, distance_metric=self.distance_metric + ) + build_op = BuildOperator( + num_threads=self.num_threads, distance_metric=self.distance_metric + ) + search_op = SearchOperator( + num_threads=self.num_threads, distance_metric=self.distance_metric + ) + search_op.range = range + + exec_op = ExecutionOperator( + train_op=train_op, + build_op=build_op, + search_op=search_op, + num_threads=self.num_threads, + ) + assert hasattr(self, "io") + exec_op.set_io(self.io) + + # iterate over classic descriptors + for ci_desc in self.index_descs: + codec_desc, index_desc, knn_desc = self.create_descriptors( + ci_desc, train, build, knn, reconstruct, range + ) + exec_op.add_index_descs(codec_desc, index_desc, knn_desc) + + return exec_op + + def clone_one(self, index_desc): + benchmark = Benchmark( + num_threads=self.num_threads, + training_vectors=self.training_vectors, + database_vectors=self.database_vectors, + query_vectors=self.query_vectors, + # index_descs=[self.get_flat_desc("Flat"), index_desc], + index_descs=[index_desc], # Should automatically find flat descriptors + range_ref_index_desc=self.range_ref_index_desc, + k=self.k, + distance_metric=self.distance_metric, + ) + benchmark.set_io(self.io.clone()) + return benchmark + + def benchmark( + self, + result_file=None, + local=False, + train=False, + reconstruct=False, + knn=False, + range=False, + ): + logger.info("begin evaluate") + results = {"indices": {}, "experiments": {}} + faiss.omp_set_num_threads(self.num_threads) + exec_op = self.create_execution_operator( + train=train, + build=knn or range, + knn=knn, + reconstruct=reconstruct, + range=range, + ) + exec_op.create_ground_truths(results) todo = self.index_descs for index_desc in self.index_descs: @@ -678,15 +1123,7 @@ def benchmark( current_todo = [] next_todo = [] for index_desc in todo: - results, requires = self.benchmark_one( - dry_run=True, - results=results, - index_desc=index_desc, - train=train, - reconstruct=reconstruct, - knn=knn, - range=range, - ) + results, requires = exec_op.execute(results, dry_run=False) if requires is None: continue if requires in queued: @@ -728,15 +1165,14 @@ def benchmark( def run_benchmark_one(params): logger.info(params) index_desc, benchmark, results, train, reconstruct, knn, range = params - results, requires = benchmark.benchmark_one( - dry_run=False, - results=results, - index_desc=index_desc, + exec_op = benchmark.create_execution_operator( train=train, - reconstruct=reconstruct, + build=knn, knn=knn, + reconstruct=reconstruct, range=range, ) + results, requires = exec_op.execute(results=results, dry_run=False) assert requires is None assert results is not None return results diff --git a/thirdparty/faiss/benchs/bench_fw/benchmark_io.py b/thirdparty/faiss/benchs/bench_fw/benchmark_io.py index b39bb6029..5ee3eb3a6 100644 --- a/thirdparty/faiss/benchs/bench_fw/benchmark_io.py +++ b/thirdparty/faiss/benchs/bench_fw/benchmark_io.py @@ -53,6 +53,7 @@ def clone(self): def __post_init__(self): self.cached_ds = {} + # TODO(kuarora): rename it as get_local_file def get_local_filename(self, filename): if len(filename) > 184: fn, ext = os.path.splitext(filename) @@ -61,6 +62,9 @@ def get_local_filename(self, filename): ) return os.path.join(self.path, filename) + def get_remote_filepath(self, filename) -> Optional[str]: + return None + def download_file_from_blobstore( self, filename: str, @@ -219,7 +223,7 @@ def read_index( fn = self.download_file_from_blobstore(filename, bucket, path) logger.info(f"Loading index {fn}") ext = os.path.splitext(fn)[1] - if ext in [".faiss", ".codec"]: + if ext in [".faiss", ".codec", ".index"]: index = faiss.read_index(fn) elif ext == ".pkl": with open(fn, "rb") as model_file: diff --git a/thirdparty/faiss/benchs/bench_fw/descriptors.py b/thirdparty/faiss/benchs/bench_fw/descriptors.py index f1dd7354c..e76278ced 100644 --- a/thirdparty/faiss/benchs/bench_fw/descriptors.py +++ b/thirdparty/faiss/benchs/bench_fw/descriptors.py @@ -3,23 +3,27 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from dataclasses import dataclass import logging +import os +from dataclasses import dataclass from typing import Any, Dict, List, Optional import faiss # @manual=//faiss/python:pyfaiss_gpu + +from .benchmark_io import BenchmarkIO from .utils import timer logger = logging.getLogger(__name__) @dataclass -class IndexDescriptor: +class IndexDescriptorClassic: bucket: Optional[str] = None # either path or factory should be set, # but not both at the same time. path: Optional[str] = None factory: Optional[str] = None + codec_alias: Optional[str] = None construction_params: Optional[List[Dict[str, int]]] = None search_params: Optional[Dict[str, int]] = None # range metric definitions @@ -44,7 +48,6 @@ class IndexDescriptor: def __hash__(self): return hash(str(self)) - @dataclass class DatasetDescriptor: # namespace possible values: @@ -80,7 +83,7 @@ def __hash__(self): def get_filename( self, - prefix: str = None, + prefix: Optional[str] = None, ) -> str: filename = "" if prefix is not None: @@ -115,3 +118,208 @@ def k_means(self, io, k, dry_run): else: t = io.read_json(meta_filename)["k_means_time"] return kmeans_vectors, t, None + +@dataclass +class IndexBaseDescriptor: + d: int + metric: str + desc_name: Optional[str] = None + flat_desc_name: Optional[str] = None + bucket: Optional[str] = None + path: Optional[str] = None + num_threads: int = 1 + + def get_name(self) -> str: + raise NotImplementedError() + + def get_path(self, benchmark_io: BenchmarkIO) -> Optional[str]: + if self.path is not None: + return self.path + self.path = benchmark_io.get_remote_filepath(self.desc_name) + return self.path + + @staticmethod + def param_dict_list_to_name(param_dict_list): + if not param_dict_list: + return "" + l = 0 + n = "" + for param_dict in param_dict_list: + n += IndexBaseDescriptor.param_dict_to_name(param_dict, f"cp{l}") + l += 1 + return n + + @staticmethod + def param_dict_to_name(param_dict, prefix="sp"): + if not param_dict: + return "" + n = prefix + for name, val in param_dict.items(): + if name == "snap": + continue + if name == "lsq_gpu" and val == 0: + continue + if name == "use_beam_LUT" and val == 0: + continue + n += f"_{name}_{val}" + if n == prefix: + return "" + n += "." + return n + + +@dataclass +class CodecDescriptor(IndexBaseDescriptor): + # either path or factory should be set, + # but not both at the same time. + factory: Optional[str] = None + construction_params: Optional[List[Dict[str, int]]] = None + training_vectors: Optional[DatasetDescriptor] = None + + def __post_init__(self): + self.get_name() + + def is_trained(self): + return self.factory is None and self.path is not None + + def is_valid(self): + return self.factory is not None or self.path is not None + + def get_name(self) -> str: + if self.desc_name is not None: + return self.desc_name + if self.factory is not None: + self.desc_name = self.name_from_factory() + return self.desc_name + if self.path is not None: + self.desc_name = self.name_from_path() + return self.desc_name + raise ValueError("name, factory or path must be set") + + def flat_name(self) -> str: + if self.flat_desc_name is not None: + return self.flat_desc_name + self.flat_desc_name = f"Flat.d_{self.d}.{self.metric.upper()}." + return self.flat_desc_name + + def path(self, benchmark_io) -> str: + if self.path is not None: + return self.path + return benchmark_io.get_remote_filepath(self.get_name()) + + def name_from_factory(self) -> str: + assert self.factory is not None + name = f"{self.factory.replace(',', '_')}." + assert self.d is not None + assert self.metric is not None + name += f"d_{self.d}.{self.metric.upper()}." + if self.factory != "Flat": + assert self.training_vectors is not None + name += self.training_vectors.get_filename("xt") + name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params) + return name + + def name_from_path(self): + assert self.path is not None + filename = os.path.basename(self.path) + ext = filename.split(".")[-1] + if filename.endswith(ext): + name = filename[:-len(ext)] + else: # should never hit this rather raise value error + name = filename + return name + + def alias(self, benchmark_io : BenchmarkIO): + if hasattr(benchmark_io, "bucket"): + return CodecDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric) + return CodecDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric) + + +@dataclass +class IndexDescriptor(IndexBaseDescriptor): + codec_desc: Optional[CodecDescriptor] = None + database_desc: Optional[DatasetDescriptor] = None + + def __hash__(self): + return hash(str(self)) + + def __post_init__(self): + self.get_name() + + def is_built(self): + return self.codec_desc is None and self.database_desc is None + + def get_name(self) -> str: + if self.desc_name is None: + self.desc_name = self.codec_desc.get_name() + self.database_desc.get_filename(prefix="xb") + + return self.desc_name + + def flat_name(self): + if self.flat_desc_name is not None: + return self.flat_desc_name + self.flat_desc_name = self.codec_desc.flat_name() + self.database_desc.get_filename(prefix="xb") + return self.flat_desc_name + + # alias is used to refer when index is uploaded to blobstore and refered again + def alias(self, benchmark_io: BenchmarkIO): + if hasattr(benchmark_io, "bucket"): + return IndexDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric) + return IndexDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric) + +@dataclass +class KnnDescriptor(IndexBaseDescriptor): + index_desc: Optional[IndexDescriptor] = None + gt_index_desc: Optional[IndexDescriptor] = None + query_dataset: Optional[DatasetDescriptor] = None + search_params: Optional[Dict[str, int]] = None + reconstruct: bool = False + # range metric definitions + # key: name + # value: one of the following: + # + # radius + # [0..radius) -> 1 + # [radius..inf) -> 0 + # + # [[radius1, score1], ...] + # [0..radius1) -> score1 + # [radius1..radius2) -> score2 + # + # [[radius1_from, radius1_to, score1], ...] + # [radius1_from, radius1_to) -> score1, + # [radius2_from, radius2_to) -> score2 + range_metrics: Optional[Dict[str, Any]] = None + radius: Optional[float] = None + k: int = 1 + + range_ref_index_desc: Optional[str] = None + + def __hash__(self): + return hash(str(self)) + + def get_name(self): + name = self.index_desc.get_name() + name += IndexBaseDescriptor.param_dict_to_name(self.search_params) + name += self.query_dataset.get_filename("q") + name += f"k_{self.k}." + name += f"t_{self.num_threads}." + if self.reconstruct: + name += "rec." + else: + name += "knn." + return name + + def flat_name(self): + if self.flat_desc_name is not None: + return self.flat_desc_name + name = self.index_desc.flat_name() + name += self.query_dataset.get_filename("q") + name += f"k_{self.k}." + name += f"t_{self.num_threads}." + if self.reconstruct: + name += "rec." + else: + name += "knn." + self.flat_desc_name = name + return name diff --git a/thirdparty/faiss/benchs/bench_fw/index.py b/thirdparty/faiss/benchs/bench_fw/index.py index 14f2158e6..6b6c2d93a 100644 --- a/thirdparty/faiss/benchs/bench_fw/index.py +++ b/thirdparty/faiss/benchs/bench_fw/index.py @@ -13,6 +13,7 @@ import faiss # @manual=//faiss/python:pyfaiss_gpu import numpy as np +from faiss.benchs.bench_fw.descriptors import IndexBaseDescriptor from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu knn_intersection_measure, @@ -49,35 +50,6 @@ class IndexBase: def set_io(self, benchmark_io): self.io = benchmark_io - @staticmethod - def param_dict_list_to_name(param_dict_list): - if not param_dict_list: - return "" - l = 0 - n = "" - for param_dict in param_dict_list: - n += IndexBase.param_dict_to_name(param_dict, f"cp{l}") - l += 1 - return n - - @staticmethod - def param_dict_to_name(param_dict, prefix="sp"): - if not param_dict: - return "" - n = prefix - for name, val in param_dict.items(): - if name == "snap": - continue - if name == "lsq_gpu" and val == 0: - continue - if name == "use_beam_LUT" and val == 0: - continue - n += f"_{name}_{val}" - if n == prefix: - return "" - n += "." - return n - @staticmethod def set_index_param_dict_list(index, param_dict_list, assert_same=False): if not param_dict_list: @@ -282,7 +254,7 @@ def get_knn_search_name( reconstruct: bool = False, ): name = self.get_index_name() - name += Index.param_dict_to_name(search_parameters) + name += IndexBaseDescriptor.param_dict_to_name(search_parameters) name += query_vectors.get_filename("q") name += f"k_{k}." name += f"t_{self.num_threads}." @@ -495,7 +467,7 @@ def range_search( radius: Optional[float] = None, ): logger.info("range_search: begin") - if search_parameters is not None and search_parameters["snap"] == 1: + if search_parameters is not None and search_parameters.get("snap") == 1: query_vectors = self.snap(query_vectors) filename = ( self.get_range_search_name( @@ -582,14 +554,21 @@ class Index(IndexBase): num_threads: int d: int metric: str - database_vectors: DatasetDescriptor - construction_params: List[Dict[str, int]] - search_params: Dict[str, int] + codec_name: Optional[str] = None + index_name: Optional[str] = None + database_vectors: Optional[DatasetDescriptor] = None + construction_params: Optional[List[Dict[str, int]]] = None + search_params: Optional[Dict[str, int]] = None + serialize_full_index: bool = False + + bucket: Optional[str] = None + index_path: Optional[str] = None cached_codec: ClassVar[OrderedDict[str, faiss.Index]] = OrderedDict() cached_index: ClassVar[OrderedDict[str, faiss.Index]] = OrderedDict() def __post_init__(self): + logger.info(f"Initializing metric_type to {self.metric}") if isinstance(self.metric, str): if self.metric == "IP": self.metric_type = faiss.METRIC_INNER_PRODUCT @@ -628,13 +607,31 @@ def get_codec(self): Index.cached_codec.popitem(last=False) return Index.cached_codec[codec_name] - def get_index_name(self): - name = self.get_codec_name() - assert self.database_vectors is not None - name += self.database_vectors.get_filename("xb") - return name + def get_codec_name(self) -> Optional[str]: + return self.codec_name + + def get_index_name(self) -> Optional[str]: + return self.index_name def fetch_index(self): + # read index from file if it is already available + if self.index_path: + index_filename = os.path.basename(self.index_path) + else: + index_filename = self.index_name + "index" + if self.io.file_exist(index_filename): + if self.index_path: + index = self.io.read_index( + index_filename, + self.bucket, + os.path.dirname(self.index_path), + ) + else: + index = self.io.read_index(index_filename) + assert self.d == index.d + assert self.metric_type == index.metric_type + return index, 0 + index = self.get_codec() index.reset() assert index.ntotal == 0 @@ -664,10 +661,15 @@ def fetch_index(self): ) assert index.ntotal == xb.shape[0] or index_ivf.ntotal == xb.shape[0] logger.info("Added vectors to index") + if self.serialize_full_index: + codec_size = self.io.write_index(index, index_filename) + assert codec_size is not None + return index, t def get_index(self): - index_name = self.get_index_name() + index_name = self.index_name + # TODO(kuarora) : retrieve file from bucket and path. if index_name not in Index.cached_index: Index.cached_index[index_name], _ = self.fetch_index() if len(Index.cached_index) > 3: @@ -776,13 +778,20 @@ def add_range_or_val(name, range): ) return op + def is_flat_index(self): + return self.get_index_name().startswith("Flat") + # IndexFromCodec, IndexFromQuantizer and IndexFromPreTransform # are used to wrap pre-trained Faiss indices (codecs) @dataclass class IndexFromCodec(Index): - path: str - bucket: Optional[str] = None + path: Optional[str] = None + + def __post_init__(self): + super().__post_init__() + if self.path is None: + raise ValueError("path is not set") def get_quantizer(self): if not self.is_ivf(): @@ -801,11 +810,8 @@ def get_pretransform(self): def get_model_name(self): return os.path.basename(self.path) - def get_codec_name(self): - assert self.path is not None - name = os.path.basename(self.path) - name += Index.param_dict_list_to_name(self.construction_params) - return name + def fetch_meta(self, dry_run=False): + return None, None def fetch_codec(self): codec = self.io.read_index( @@ -865,20 +871,15 @@ def get_codec(self): # IndexFromFactory is for creating and training indices from scratch @dataclass class IndexFromFactory(Index): - factory: str - training_vectors: DatasetDescriptor + factory: Optional[str] = None + training_vectors: Optional[DatasetDescriptor] = None - def get_codec_name(self): - assert self.factory is not None - name = f"{self.factory.replace(',', '_')}." - assert self.d is not None - assert self.metric is not None - name += f"d_{self.d}.{self.metric.upper()}." - if self.factory != "Flat": - assert self.training_vectors is not None - name += self.training_vectors.get_filename("xt") - name += Index.param_dict_list_to_name(self.construction_params) - return name + def __post_init__(self): + super().__post_init__() + if self.factory is None: + raise ValueError("factory is not set") + if self.factory != "Flat" and self.training_vectors is None: + raise ValueError(f"training_vectors is not set for {self.factory}") def fetch_meta(self, dry_run=False): meta_filename = self.get_codec_name() + "json" @@ -911,7 +912,7 @@ def fetch_codec(self, dry_run=False): assert codec_size is not None meta = { "training_time": training_time, - "training_size": self.training_vectors.num_vectors, + "training_size": self.training_vectors.num_vectors if self.training_vectors else 0, "codec_size": codec_size, "sa_code_size": self.get_sa_code_size(codec), "code_size": self.get_code_size(codec), diff --git a/thirdparty/faiss/benchs/bench_fw/optimize.py b/thirdparty/faiss/benchs/bench_fw/optimize.py index 473436ea6..ac6c45ab0 100644 --- a/thirdparty/faiss/benchs/bench_fw/optimize.py +++ b/thirdparty/faiss/benchs/bench_fw/optimize.py @@ -14,7 +14,7 @@ # ) from .benchmark import Benchmark -from .descriptors import DatasetDescriptor, IndexDescriptor +from .descriptors import DatasetDescriptor, IndexDescriptorClassic from .utils import dict_merge, filter_results, ParetoMetric, ParetoMode logger = logging.getLogger(__name__) @@ -78,7 +78,7 @@ def benchmark_and_filter_candidates( ) assert filtered index_descs = [ - IndexDescriptor( + IndexDescriptorClassic( factory=v["factory"], construction_params=v["construction_params"], search_params=v["search_params"], @@ -103,8 +103,8 @@ def optimize_quantizer( dry_run=False, ) - descs = [IndexDescriptor(factory="Flat"),] + [ - IndexDescriptor( + descs = [IndexDescriptorClassic(factory="Flat"),] + [ + IndexDescriptorClassic( factory="HNSW32", construction_params=[{"efConstruction": 2**i}], ) @@ -131,7 +131,7 @@ def optimize_ivf( training_vectors: DatasetDescriptor, database_vectors: DatasetDescriptor, query_vectors: DatasetDescriptor, - quantizers: Dict[int, List[IndexDescriptor]], + quantizers: Dict[int, List[IndexDescriptorClassic]], codecs: List[Tuple[str, str]], min_accuracy: float, ): @@ -159,7 +159,7 @@ def optimize_ivf( quantizer_desc.search_params, ) ivf_descs.append( - IndexDescriptor( + IndexDescriptorClassic( factory=f"{pretransform}IVF{nlist}({quantizer_desc.factory}),{fine_ivf}", construction_params=construction_params, ) @@ -188,7 +188,7 @@ def ivf_flat_nprobe_required_for_accuracy( ): _, results = self.benchmark_and_filter_candidates( index_descs=[ - IndexDescriptor(factory=f"IVF{nlist}(Flat),Flat"), + IndexDescriptorClassic(factory=f"IVF{nlist}(Flat),Flat"), ], training_vectors=training_vectors, database_vectors=database_vectors, @@ -226,7 +226,9 @@ def optimize_codec( [ (None, "Flat"), (None, "SQfp16"), + (None, "SQbf16"), (None, "SQ8"), + (None, "SQ8_direct_signed"), ] + [ (f"OPQ{M}_{M * dim}", f"PQ{M}x{b}") for M in [8, 12, 16, 32, 48, 64, 96, 128, 192, 256] @@ -254,7 +256,7 @@ def optimize_codec( _, filtered = self.benchmark_and_filter_candidates( index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{nlist},{pq}" if opq is None else f"{opq},IVF{nlist},{pq}", diff --git a/thirdparty/faiss/benchs/bench_fw_codecs.py b/thirdparty/faiss/benchs/bench_fw_codecs.py index 80741e23f..d3efc2da0 100644 --- a/thirdparty/faiss/benchs/bench_fw_codecs.py +++ b/thirdparty/faiss/benchs/bench_fw_codecs.py @@ -7,10 +7,10 @@ import argparse import os -from bench_fw.benchmark import Benchmark -from bench_fw.benchmark_io import BenchmarkIO -from bench_fw.descriptors import DatasetDescriptor, IndexDescriptor -from bench_fw.index import IndexFromFactory +from faiss.benchs.bench_fw.benchmark import Benchmark +from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO +from faiss.benchs.bench_fw.descriptors import DatasetDescriptor, IndexDescriptorClassic +from faiss.benchs.bench_fw.index import IndexFromFactory logging.basicConfig(level=logging.INFO) @@ -107,7 +107,7 @@ def run_local(rp): database_vectors=database_vectors, query_vectors=query_vectors, index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory=factory, construction_params=construction_params, training_size=training_size, diff --git a/thirdparty/faiss/benchs/bench_fw_ivf.py b/thirdparty/faiss/benchs/bench_fw_ivf.py index 8c84743e2..b0c108b7d 100644 --- a/thirdparty/faiss/benchs/bench_fw_ivf.py +++ b/thirdparty/faiss/benchs/bench_fw_ivf.py @@ -3,16 +3,20 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import logging import argparse +import logging import os -from bench_fw.benchmark import Benchmark -from bench_fw.benchmark_io import BenchmarkIO -from bench_fw.descriptors import DatasetDescriptor, IndexDescriptor +from faiss.benchs.bench_fw.benchmark import Benchmark +from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO +from faiss.benchs.bench_fw.descriptors import ( + DatasetDescriptor, + IndexDescriptorClassic, +) logging.basicConfig(level=logging.INFO) + def sift1M(bio): benchmark = Benchmark( num_threads=32, @@ -26,7 +30,7 @@ def sift1M(bio): namespace="std_q", tablename="sift1M" ), index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{2 ** nlist},Flat", ) for nlist in range(8, 15) @@ -34,8 +38,9 @@ def sift1M(bio): k=1, distance_metric="L2", ) - benchmark.set_io(bio) - benchmark.benchmark(result_file="result.json", local=False, train=True, reconstruct=False, knn=True, range=False) + benchmark.io = bio + benchmark.benchmark(result_file="result.json", local=True, train=True, reconstruct=False, knn=True, range=False) + def bigann(bio): for scale in [1, 2, 5, 10, 20, 50]: @@ -51,11 +56,11 @@ def bigann(bio): namespace="std_q", tablename="bigann1M" ), index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{2 ** nlist},Flat", ) for nlist in range(11, 19) ] + [ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{2 ** nlist}_HNSW32,Flat", construction_params=[None, {"efConstruction": 200, "efSearch": 40}], ) for nlist in range(11, 19) @@ -79,18 +84,18 @@ def ssnpp(bio): tablename="ssnpp_queries_10K.npy" ), index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{2 ** nlist},PQ256x4fs,Refine(SQfp16)", ) for nlist in range(9, 16) ] + [ - IndexDescriptor( + IndexDescriptorClassic( factory=f"IVF{2 ** nlist},Flat", ) for nlist in range(9, 16) ] + [ - IndexDescriptor( + IndexDescriptorClassic( factory=f"PQ256x4fs,Refine(SQfp16)", ), - IndexDescriptor( + IndexDescriptorClassic( factory=f"HNSW32", ), ], diff --git a/thirdparty/faiss/benchs/bench_fw_notebook.ipynb b/thirdparty/faiss/benchs/bench_fw_notebook.ipynb index 5752aaf5f..c38ed1106 100644 --- a/thirdparty/faiss/benchs/bench_fw_notebook.ipynb +++ b/thirdparty/faiss/benchs/bench_fw_notebook.ipynb @@ -1,529 +1,532 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "be081589-e1b2-4569-acb7-44203e273899", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import itertools\n", - "from faiss.contrib.evaluation import OperatingPoints\n", - "from enum import Enum\n", - "from bench_fw.benchmark_io import BenchmarkIO as BIO\n", - "from bench_fw.utils import filter_results, ParetoMode, ParetoMetric\n", - "from copy import copy\n", - "import numpy as np\n", - "import datetime\n", - "import glob\n", - "import io\n", - "import json\n", - "from zipfile import ZipFile\n", - "import tabulate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6492e95-24c7-4425-bf0a-27e10e879ca6", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "root = \"/checkpoint/gsz/bench_fw/optimize/bigann\"\n", - "results = BIO(root).read_json(\"result_std_d_bigann10M.json\")\n", - "results.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0875d269-aef4-426d-83dd-866970f43777", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "results['experiments']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f080a6e2-1565-418b-8732-4adeff03a099", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def plot_metric(experiments, accuracy_title, cost_title, plot_space=False, plot=None):\n", - " if plot is None:\n", - " plot = plt.subplot()\n", - " x = {}\n", - " y = {}\n", - " for accuracy, space, time, k, v in experiments:\n", - " idx_name = v['index'] + (\"snap\" if 'search_params' in v and v['search_params'][\"snap\"] == 1 else \"\")\n", - " if idx_name not in x:\n", - " x[idx_name] = []\n", - " y[idx_name] = []\n", - " x[idx_name].append(accuracy)\n", - " if plot_space:\n", - " y[idx_name].append(space)\n", - " else:\n", - " y[idx_name].append(time)\n", - "\n", - " #plt.figure(figsize=(10,6))\n", - " #plt.title(accuracy_title)\n", - " plot.set_xlabel(accuracy_title)\n", - " plot.set_ylabel(cost_title)\n", - " plot.set_yscale(\"log\")\n", - " marker = itertools.cycle((\"o\", \"v\", \"^\", \"<\", \">\", \"s\", \"p\", \"P\", \"*\", \"h\", \"X\", \"D\")) \n", - " for index in x.keys():\n", - " plot.plot(x[index], y[index], marker=next(marker), label=index, linewidth=0)\n", - " plot.legend(bbox_to_anchor=(1, 1), loc='upper left')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "61007155-5edc-449e-835e-c141a01a2ae5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# index local optima\n", - "accuracy_metric = \"knn_intersection\"\n", - "fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1, min_accuracy=0.95)\n", - "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 32 cores)\", plot_space=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9f94dcc-5abe-4cad-9619-f5d1d24fb8c1", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# global optima\n", - "accuracy_metric = \"knn_intersection\"\n", - "fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0.90, max_space=64, max_time=0, name_filter=lambda n: not n.startswith(\"Flat\"), pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 32 cores)\", plot_space=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c10f587-26ef-49ec-83a9-88f6a2a433e8", - "metadata": {}, - "outputs": [], - "source": [ - "def pretty_params(p):\n", - " p = copy(p)\n", - " if 'snap' in p and p['snap'] == 0:\n", - " del p['snap']\n", - " return p\n", - " \n", - "tabulate.tabulate([(accuracy, space, time, v['factory'], pretty_params(v['construction_params'][1]), pretty_params(v['search_params'])) \n", - " for accuracy, space, time, k, v in fr],\n", - " tablefmt=\"html\",\n", - " headers=[\"accuracy\",\"space\", \"time\", \"factory\", \"quantizer cfg\", \"search cfg\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36e82084-18f6-4546-a717-163eb0224ee8", - "metadata": {}, - "outputs": [], - "source": [ - "# index local optima @ precision 0.8\n", - "precision = 0.8\n", - "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", - "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aff79376-39f7-47c0-8b83-1efe5192bb7e", - "metadata": {}, - "outputs": [], - "source": [ - "# index local optima @ precision 0.2\n", - "precision = 0.2\n", - "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", - "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4834f1f-bbbe-4cae-9aa0-a459b0c842d1", - "metadata": {}, - "outputs": [], - "source": [ - "# global optima @ precision 0.8\n", - "precision = 0.8\n", - "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", - "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9aead830-6209-4956-b7ea-4a5e0029d616", - "metadata": {}, - "outputs": [], - "source": [ - "def plot_range_search_pr_curves(experiments):\n", - " x = {}\n", - " y = {}\n", - " show = {\n", - " 'Flat': None,\n", - " }\n", - " for _, _, _, k, v in fr:\n", - " if \".weighted\" in k: # and v['index'] in show:\n", - " x[k] = v['range_search_pr']['recall']\n", - " y[k] = v['range_search_pr']['precision']\n", - " \n", - " plt.title(\"range search recall\")\n", - " plt.xlabel(\"recall\")\n", - " plt.ylabel(\"precision\")\n", - " for index in x.keys():\n", - " plt.plot(x[index], y[index], '.', label=index)\n", - " plt.legend(bbox_to_anchor=(1.0, 1.0), loc='upper left')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92e45502-7a31-4a15-90df-fa3032d7d350", - "metadata": {}, - "outputs": [], - "source": [ - "precision = 0.8\n", - "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", - "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME_SPACE, scaling_factor=1)\n", - "plot_range_search_pr_curves(fr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdf8148a-0da6-4c5e-8d60-f8f85314574c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "root = \"/checkpoint/gsz/bench_fw/ivf/bigann\"\n", - "scales = [1, 2, 5, 10, 20, 50]\n", - "fig, plots = plt.subplots(len(scales), sharex=True, figsize=(5,25))\n", - "fig.tight_layout()\n", - "for plot, scale in zip(plots, scales, strict=True):\n", - " results = BIO(root).read_json(f\"result{scale}.json\")\n", - " accuracy_metric = \"knn_intersection\"\n", - " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0.9, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - " plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 64 cores)\", plot=plot)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e503828c-ee61-45f7-814b-cce6461109bc", - "metadata": {}, - "outputs": [], - "source": [ - "x = {}\n", - "y = {}\n", - "accuracy=0.9\n", - "root = \"/checkpoint/gsz/bench_fw/ivf/bigann\"\n", - "scales = [1, 2, 5, 10, 20, 50]\n", - "#fig, plots = plt.subplots(len(scales), sharex=True, figsize=(5,25))\n", - "#fig.tight_layout()\n", - "for scale in scales:\n", - " results = BIO(root).read_json(f\"result{scale}.json\")\n", - " scale *= 1_000_000\n", - " accuracy_metric = \"knn_intersection\"\n", - " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=accuracy, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - " seen = set()\n", - " print(scale)\n", - " for _, _, _, _, exp in fr:\n", - " fact = exp[\"factory\"]\n", - " # \"HNSW\" in fact or \n", - " if fact in seen or fact in [\"Flat\", \"IVF512,Flat\", \"IVF1024,Flat\", \"IVF2048,Flat\"]:\n", - " continue\n", - " seen.add(fact)\n", - " if fact not in x:\n", - " x[fact] = []\n", - " y[fact] = []\n", - " x[fact].append(scale)\n", - " y[fact].append(exp[\"time\"] + exp[\"quantizer\"][\"time\"])\n", - " if (exp[\"knn_intersection\"] > 0.92):\n", - " print(fact)\n", - " print(exp[\"search_params\"])\n", - " print(exp[\"knn_intersection\"])\n", - "\n", - " #plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 64 cores)\", plot=plot)\n", - " \n", - "plt.title(f\"recall @ 1 = {accuracy*100}%\")\n", - "plt.xlabel(\"database size\")\n", - "plt.ylabel(\"time\")\n", - "plt.xscale(\"log\")\n", - "plt.yscale(\"log\")\n", - "\n", - "marker = itertools.cycle((\"o\", \"v\", \"^\", \"<\", \">\", \"s\", \"p\", \"P\", \"*\", \"h\", \"X\", \"D\")) \n", - "for index in x.keys():\n", - " if \"HNSW\" in index:\n", - " plt.plot(x[index], y[index], label=index, linewidth=1, marker=next(marker), linestyle=\"dashed\")\n", - " else:\n", - " plt.plot(x[index], y[index], label=index, linewidth=1, marker=next(marker))\n", - "plt.legend(bbox_to_anchor=(1.0, 1.0), loc='upper left')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37a99bb2-f998-461b-a345-7cc6e702cb3a", - "metadata": {}, - "outputs": [], - "source": [ - "# global optima\n", - "accuracy_metric = \"sym_recall\"\n", - "fr = filter_results(results, evaluation=\"rec\", accuracy_metric=accuracy_metric, time_metric=lambda e:e['encode_time'], min_accuracy=0.9, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.SPACE, scaling_factor=1)\n", - "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"space\", plot_space=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c973ce4e-3566-4f02-bd93-f113e3e0c791", - "metadata": {}, - "outputs": [], - "source": [ - "def pretty_time(s):\n", - " if s is None:\n", - " return \"None\"\n", - " s = int(s * 1000) / 1000\n", - " m, s = divmod(s, 60)\n", - " h, m = divmod(m, 60)\n", - " d, h = divmod(h, 24)\n", - " r = \"\"\n", - " if d > 0:\n", - " r += f\"{int(d)}d \"\n", - " if h > 0:\n", - " r += f\"{int(h)}h \"\n", - " if m > 0:\n", - " r += f\"{int(m)}m \"\n", - " if s > 0 or len(r) == 0:\n", - " r += f\"{s:.3f}s\"\n", - " return r\n", - "\n", - "def pretty_size(s):\n", - " if s > 1024 * 1024:\n", - " return f\"{s / 1024 / 1024:.1f}\".rstrip('0').rstrip('.') + \"MB\"\n", - " if s > 1024:\n", - " return f\"{s / 1024:.1f}\".rstrip('0').rstrip('.') + \"KB\"\n", - " return f\"{s}\"\n", - "\n", - "def pretty_mse(m):\n", - " if m is None:\n", - " return \"None\"\n", - " else:\n", - " return f\"{m:.6f}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ddcf226-fb97-4a59-9fc3-3ed8f7d5e703", - "metadata": {}, - "outputs": [], - "source": [ - "data = {}\n", - "root = \"/checkpoint/gsz/bench_fw/bigann\"\n", - "scales = [1, 2, 5, 10, 20, 50]\n", - "for scale in scales:\n", - " results = BIO(root).read_json(f\"result{scale}.json\")\n", - " accuracy_metric = \"knn_intersection\"\n", - " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", - " d = {}\n", - " data[f\"{scale}M\"] = d\n", - " for _, _, _, _, exp in fr:\n", - " fact = exp[\"factory\"]\n", - " # \"HNSW\" in fact or \n", - " if fact in [\"Flat\", \"IVF512,Flat\", \"IVF1024,Flat\", \"IVF2048,Flat\"]:\n", - " continue\n", - " if fact not in d:\n", - " d[fact] = []\n", - " d[fact].append({\n", - " \"nprobe\": exp[\"search_params\"][\"nprobe\"],\n", - " \"recall\": exp[\"knn_intersection\"],\n", - " \"time\": exp[\"time\"] + exp[\"quantizer\"][\"time\"],\n", - " })\n", - "data\n", - "# with open(\"/checkpoint/gsz/bench_fw/codecs.json\", \"w\") as f:\n", - "# json.dump(data, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e54eebb6-0a9f-4a72-84d2-f12c5bd44510", - "metadata": {}, - "outputs": [], - "source": [ - "ds = \"deep1b\"\n", - "data = []\n", - "jss = []\n", - "root = f\"/checkpoint/gsz/bench_fw/codecs/{ds}\"\n", - "results = BIO(root).read_json(f\"result.json\")\n", - "for k, e in results[\"experiments\"].items():\n", - " if \"rec\" in k and e['factory'] != 'Flat': # and e['sym_recall'] > 0.0: # and \"PRQ\" in e['factory'] and e['sym_recall'] > 0.0:\n", - " code_size = results['indices'][e['codec']]['sa_code_size']\n", - " codec_size = results['indices'][e['codec']]['codec_size']\n", - " training_time = results['indices'][e['codec']]['training_time']\n", - " # training_size = results['indices'][e['codec']]['training_size']\n", - " cpu = e['cpu'] if 'cpu' in e else \"\"\n", - " ps = ', '.join([f\"{k}={v}\" for k,v in e['construction_params'][0].items()]) if e['construction_params'] else \" \"\n", - " eps = ', '.join([f\"{k}={v}\" for k,v in e['reconstruct_params'].items() if k != \"snap\"]) if e['reconstruct_params'] else \" \"\n", - " data.append((code_size, f\"|{e['factory']}|{ps}|{eps}|{code_size}|{pretty_size(codec_size)}|{pretty_time(training_time)}|{training_size}|{pretty_mse(e['mse'])}|{e['sym_recall']}|{e['asym_recall']}|{pretty_time(e['encode_time'])}|{pretty_time(e['decode_time'])}|{cpu}|\"))\n", - " jss.append({\n", - " 'factory': e['factory'],\n", - " 'parameters': e['construction_params'][0] if e['construction_params'] else \"\",\n", - " 'evaluation_params': e['reconstruct_params'],\n", - " 'code_size': code_size,\n", - " 'codec_size': codec_size,\n", - " 'training_time': training_time,\n", - " 'training_size': training_size,\n", - " 'mse': e['mse'],\n", - " 'sym_recall': e['sym_recall'],\n", - " 'asym_recall': e['asym_recall'],\n", - " 'encode_time': e['encode_time'],\n", - " 'decode_time': e['decode_time'],\n", - " 'cpu': cpu,\n", - " })\n", - "\n", - "print(\"|factory key|construction parameters|evaluation parameters|code size|codec size|training time|training size|mean squared error|sym recall @ 1|asym recall @ 1|encode time|decode time|cpu|\")\n", - "print(\"|-|-|-|-|-|-|-|-|-|\")\n", - "data.sort()\n", - "for d in data:\n", - " print(d[1])\n", - "\n", - "with open(f\"/checkpoint/gsz/bench_fw/codecs_{ds}_test.json\", \"w\") as f:\n", - " json.dump(jss, f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1216733-9670-407c-b3d2-5f87bce0321c", - "metadata": {}, - "outputs": [], - "source": [ - "def read_file(filename: str, keys):\n", - " results = []\n", - " with ZipFile(filename, \"r\") as zip_file:\n", - " for key in keys:\n", - " with zip_file.open(key, \"r\") as f:\n", - " if key in [\"D\", \"I\", \"R\", \"lims\"]:\n", - " results.append(np.load(f))\n", - " elif key in [\"P\"]:\n", - " t = io.TextIOWrapper(f)\n", - " results.append(json.load(t))\n", - " else:\n", - " raise AssertionError()\n", - " return results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "56de051e-22db-4bef-b242-1ddabc9e0bb9", - "metadata": {}, - "outputs": [], - "source": [ - "ds = \"contriever\"\n", - "data = []\n", - "jss = []\n", - "root = f\"/checkpoint/gsz/bench_fw/codecs/{ds}\"\n", - "for lf in glob.glob(root + '/*rec*.zip'):\n", - " e, = read_file(lf, ['P'])\n", - " if e['factory'] != 'Flat': # and e['sym_recall'] > 0.0: # and \"PRQ\" in e['factory'] and e['sym_recall'] > 0.0:\n", - " code_size = e['codec_meta']['sa_code_size']\n", - " codec_size = e['codec_meta']['codec_size']\n", - " training_time = e['codec_meta']['training_time']\n", - " training_size = None # e['codec_meta']['training_size']\n", - " cpu = e['cpu'] if 'cpu' in e else \"\"\n", - " ps = ', '.join([f\"{k}={v}\" for k,v in e['construction_params'][0].items()]) if e['construction_params'] else \" \"\n", - " eps = ', '.join([f\"{k}={v}\" for k,v in e['reconstruct_params'].items() if k != \"snap\"]) if e['reconstruct_params'] else \" \"\n", - " if eps in ps and eps != \"encode_ils_iters=16\" and eps != \"max_beam_size=32\":\n", - " eps = \" \"\n", - " data.append((code_size, f\"|{e['factory']}|{ps}|{eps}|{code_size}|{pretty_size(codec_size)}|{pretty_time(training_time)}|{pretty_mse(e['mse'])}|{e['sym_recall']}|{e['asym_recall']}|{pretty_time(e['encode_time'])}|{pretty_time(e['decode_time'])}|{cpu}|\"))\n", - " eps = e['reconstruct_params']\n", - " del eps['snap']\n", - " params = copy(e['construction_params'][0]) if e['construction_params'] else {}\n", - " for k, v in e['reconstruct_params'].items():\n", - " params[k] = v\n", - " jss.append({\n", - " 'factory': e['factory'],\n", - " 'params': params,\n", - " 'construction_params': e['construction_params'][0] if e['construction_params'] else {},\n", - " 'evaluation_params': e['reconstruct_params'],\n", - " 'code_size': code_size,\n", - " 'codec_size': codec_size,\n", - " 'training_time': training_time,\n", - " # 'training_size': training_size,\n", - " 'mse': e['mse'],\n", - " 'sym_recall': e['sym_recall'],\n", - " 'asym_recall': e['asym_recall'],\n", - " 'encode_time': e['encode_time'],\n", - " 'decode_time': e['decode_time'],\n", - " 'cpu': cpu,\n", - " })\n", - "\n", - "print(\"|factory key|construction parameters|encode/decode parameters|code size|codec size|training time|mean squared error|sym recall @ 1|asym recall @ 1|encode time|decode time|cpu|\")\n", - "print(\"|-|-|-|-|-|-|-|-|-|\")\n", - "data.sort()\n", - "# for d in data:\n", - "# print(d[1])\n", - "\n", - "print(len(data))\n", - "\n", - "with open(f\"/checkpoint/gsz/bench_fw/codecs_{ds}_5.json\", \"w\") as f:\n", - " json.dump(jss, f)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python [conda env:.conda-faiss_from_source] *", - "language": "python", - "name": "conda-env-.conda-faiss_from_source-py" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 - } + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "be081589-e1b2-4569-acb7-44203e273899", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import itertools\n", + "from faiss.contrib.evaluation import OperatingPoints\n", + "from enum import Enum\n", + "from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO as BIO\n", + "from faiss.benchs.bench_fw.utils import filter_results, ParetoMode, ParetoMetric\n", + "from copy import copy\n", + "import numpy as np\n", + "import datetime\n", + "import glob\n", + "import io\n", + "import json\n", + "from zipfile import ZipFile\n", + "import tabulate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6492e95-24c7-4425-bf0a-27e10e879ca6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import getpass\n", + "username = getpass.getuser()\n", + "root = f\"/home/{username}/simsearch/data/ivf/results/sift1M\"\n", + "results = BIO(root).read_json(\"result.json\")\n", + "results.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0875d269-aef4-426d-83dd-866970f43777", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "results['experiments']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f080a6e2-1565-418b-8732-4adeff03a099", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def plot_metric(experiments, accuracy_title, cost_title, plot_space=False, plot=None):\n", + " if plot is None:\n", + " plot = plt.subplot()\n", + " x = {}\n", + " y = {}\n", + " for accuracy, space, time, k, v in experiments:\n", + " idx_name = v['index'] + (\"snap\" if 'search_params' in v and v['search_params'][\"snap\"] == 1 else \"\")\n", + " if idx_name not in x:\n", + " x[idx_name] = []\n", + " y[idx_name] = []\n", + " x[idx_name].append(accuracy)\n", + " if plot_space:\n", + " y[idx_name].append(space)\n", + " else:\n", + " y[idx_name].append(time)\n", + "\n", + " #plt.figure(figsize=(10,6))\n", + " #plt.title(accuracy_title)\n", + " plot.set_xlabel(accuracy_title)\n", + " plot.set_ylabel(cost_title)\n", + " plot.set_yscale(\"log\")\n", + " marker = itertools.cycle((\"o\", \"v\", \"^\", \"<\", \">\", \"s\", \"p\", \"P\", \"*\", \"h\", \"X\", \"D\")) \n", + " for index in x.keys():\n", + " plot.plot(x[index], y[index], marker=next(marker), label=index, linewidth=0)\n", + " plot.legend(bbox_to_anchor=(1, 1), loc='upper left')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61007155-5edc-449e-835e-c141a01a2ae5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# index local optima\n", + "accuracy_metric = \"knn_intersection\"\n", + "fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1, min_accuracy=0.95)\n", + "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 32 cores)\", plot_space=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9f94dcc-5abe-4cad-9619-f5d1d24fb8c1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# global optima\n", + "accuracy_metric = \"knn_intersection\"\n", + "fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0.25, name_filter=lambda n: not n.startswith(\"Flat\"), pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + "#fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0.90, max_space=64, max_time=0, name_filter=lambda n: not n.startswith(\"Flat\"), pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 32 cores)\", plot_space=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c10f587-26ef-49ec-83a9-88f6a2a433e8", + "metadata": {}, + "outputs": [], + "source": [ + "def pretty_params(p):\n", + " p = copy(p)\n", + " if 'snap' in p and p['snap'] == 0:\n", + " del p['snap']\n", + " return p\n", + " \n", + "tabulate.tabulate([(accuracy, space, time, v['factory'], pretty_params(v['construction_params'][1]), pretty_params(v['search_params'])) \n", + " for accuracy, space, time, k, v in fr],\n", + " tablefmt=\"html\",\n", + " headers=[\"accuracy\",\"space\", \"time\", \"factory\", \"quantizer cfg\", \"search cfg\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36e82084-18f6-4546-a717-163eb0224ee8", + "metadata": {}, + "outputs": [], + "source": [ + "# index local optima @ precision 0.8\n", + "precision = 0.8\n", + "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", + "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aff79376-39f7-47c0-8b83-1efe5192bb7e", + "metadata": {}, + "outputs": [], + "source": [ + "# index local optima @ precision 0.2\n", + "precision = 0.2\n", + "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", + "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4834f1f-bbbe-4cae-9aa0-a459b0c842d1", + "metadata": {}, + "outputs": [], + "source": [ + "# global optima @ precision 0.8\n", + "precision = 0.8\n", + "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", + "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + "plot_metric(fr, accuracy_title=f\"range recall @ precision {precision}\", cost_title=\"time (seconds, 16 cores)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9aead830-6209-4956-b7ea-4a5e0029d616", + "metadata": {}, + "outputs": [], + "source": [ + "def plot_range_search_pr_curves(experiments):\n", + " x = {}\n", + " y = {}\n", + " show = {\n", + " 'Flat': None,\n", + " }\n", + " for _, _, _, k, v in fr:\n", + " if \".weighted\" in k: # and v['index'] in show:\n", + " x[k] = v['range_search_pr']['recall']\n", + " y[k] = v['range_search_pr']['precision']\n", + " \n", + " plt.title(\"range search recall\")\n", + " plt.xlabel(\"recall\")\n", + " plt.ylabel(\"precision\")\n", + " for index in x.keys():\n", + " plt.plot(x[index], y[index], '.', label=index)\n", + " plt.legend(bbox_to_anchor=(1.0, 1.0), loc='upper left')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92e45502-7a31-4a15-90df-fa3032d7d350", + "metadata": {}, + "outputs": [], + "source": [ + "precision = 0.8\n", + "accuracy_metric = lambda exp: range_search_recall_at_precision(exp, precision)\n", + "fr = filter_results(results, evaluation=\"weighted\", accuracy_metric=accuracy_metric, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME_SPACE, scaling_factor=1)\n", + "plot_range_search_pr_curves(fr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf8148a-0da6-4c5e-8d60-f8f85314574c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "root = \"/checkpoint/gsz/bench_fw/ivf/bigann\"\n", + "scales = [1, 2, 5, 10, 20, 50]\n", + "fig, plots = plt.subplots(len(scales), sharex=True, figsize=(5,25))\n", + "fig.tight_layout()\n", + "for plot, scale in zip(plots, scales, strict=True):\n", + " results = BIO(root).read_json(f\"result{scale}.json\")\n", + " accuracy_metric = \"knn_intersection\"\n", + " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0.9, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + " plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 64 cores)\", plot=plot)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e503828c-ee61-45f7-814b-cce6461109bc", + "metadata": {}, + "outputs": [], + "source": [ + "x = {}\n", + "y = {}\n", + "accuracy=0.9\n", + "root = \"/checkpoint/gsz/bench_fw/ivf/bigann\"\n", + "scales = [1, 2, 5, 10, 20, 50]\n", + "#fig, plots = plt.subplots(len(scales), sharex=True, figsize=(5,25))\n", + "#fig.tight_layout()\n", + "for scale in scales:\n", + " results = BIO(root).read_json(f\"result{scale}.json\")\n", + " scale *= 1_000_000\n", + " accuracy_metric = \"knn_intersection\"\n", + " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=accuracy, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + " seen = set()\n", + " print(scale)\n", + " for _, _, _, _, exp in fr:\n", + " fact = exp[\"factory\"]\n", + " # \"HNSW\" in fact or \n", + " if fact in seen or fact in [\"Flat\", \"IVF512,Flat\", \"IVF1024,Flat\", \"IVF2048,Flat\"]:\n", + " continue\n", + " seen.add(fact)\n", + " if fact not in x:\n", + " x[fact] = []\n", + " y[fact] = []\n", + " x[fact].append(scale)\n", + " y[fact].append(exp[\"time\"] + exp[\"quantizer\"][\"time\"])\n", + " if (exp[\"knn_intersection\"] > 0.92):\n", + " print(fact)\n", + " print(exp[\"search_params\"])\n", + " print(exp[\"knn_intersection\"])\n", + "\n", + " #plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"time (seconds, 64 cores)\", plot=plot)\n", + " \n", + "plt.title(f\"recall @ 1 = {accuracy*100}%\")\n", + "plt.xlabel(\"database size\")\n", + "plt.ylabel(\"time\")\n", + "plt.xscale(\"log\")\n", + "plt.yscale(\"log\")\n", + "\n", + "marker = itertools.cycle((\"o\", \"v\", \"^\", \"<\", \">\", \"s\", \"p\", \"P\", \"*\", \"h\", \"X\", \"D\")) \n", + "for index in x.keys():\n", + " if \"HNSW\" in index:\n", + " plt.plot(x[index], y[index], label=index, linewidth=1, marker=next(marker), linestyle=\"dashed\")\n", + " else:\n", + " plt.plot(x[index], y[index], label=index, linewidth=1, marker=next(marker))\n", + "plt.legend(bbox_to_anchor=(1.0, 1.0), loc='upper left')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37a99bb2-f998-461b-a345-7cc6e702cb3a", + "metadata": {}, + "outputs": [], + "source": [ + "# global optima\n", + "accuracy_metric = \"sym_recall\"\n", + "fr = filter_results(results, evaluation=\"rec\", accuracy_metric=accuracy_metric, time_metric=lambda e:e['encode_time'], min_accuracy=0.9, pareto_mode=ParetoMode.GLOBAL, pareto_metric=ParetoMetric.SPACE, scaling_factor=1)\n", + "plot_metric(fr, accuracy_title=\"knn intersection\", cost_title=\"space\", plot_space=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c973ce4e-3566-4f02-bd93-f113e3e0c791", + "metadata": {}, + "outputs": [], + "source": [ + "def pretty_time(s):\n", + " if s is None:\n", + " return \"None\"\n", + " s = int(s * 1000) / 1000\n", + " m, s = divmod(s, 60)\n", + " h, m = divmod(m, 60)\n", + " d, h = divmod(h, 24)\n", + " r = \"\"\n", + " if d > 0:\n", + " r += f\"{int(d)}d \"\n", + " if h > 0:\n", + " r += f\"{int(h)}h \"\n", + " if m > 0:\n", + " r += f\"{int(m)}m \"\n", + " if s > 0 or len(r) == 0:\n", + " r += f\"{s:.3f}s\"\n", + " return r\n", + "\n", + "def pretty_size(s):\n", + " if s > 1024 * 1024:\n", + " return f\"{s / 1024 / 1024:.1f}\".rstrip('0').rstrip('.') + \"MB\"\n", + " if s > 1024:\n", + " return f\"{s / 1024:.1f}\".rstrip('0').rstrip('.') + \"KB\"\n", + " return f\"{s}\"\n", + "\n", + "def pretty_mse(m):\n", + " if m is None:\n", + " return \"None\"\n", + " else:\n", + " return f\"{m:.6f}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ddcf226-fb97-4a59-9fc3-3ed8f7d5e703", + "metadata": {}, + "outputs": [], + "source": [ + "data = {}\n", + "root = \"/checkpoint/gsz/bench_fw/bigann\"\n", + "scales = [1, 2, 5, 10, 20, 50]\n", + "for scale in scales:\n", + " results = BIO(root).read_json(f\"result{scale}.json\")\n", + " accuracy_metric = \"knn_intersection\"\n", + " fr = filter_results(results, evaluation=\"knn\", accuracy_metric=accuracy_metric, min_accuracy=0, pareto_mode=ParetoMode.INDEX, pareto_metric=ParetoMetric.TIME, scaling_factor=1)\n", + " d = {}\n", + " data[f\"{scale}M\"] = d\n", + " for _, _, _, _, exp in fr:\n", + " fact = exp[\"factory\"]\n", + " # \"HNSW\" in fact or \n", + " if fact in [\"Flat\", \"IVF512,Flat\", \"IVF1024,Flat\", \"IVF2048,Flat\"]:\n", + " continue\n", + " if fact not in d:\n", + " d[fact] = []\n", + " d[fact].append({\n", + " \"nprobe\": exp[\"search_params\"][\"nprobe\"],\n", + " \"recall\": exp[\"knn_intersection\"],\n", + " \"time\": exp[\"time\"] + exp[\"quantizer\"][\"time\"],\n", + " })\n", + "data\n", + "# with open(\"/checkpoint/gsz/bench_fw/codecs.json\", \"w\") as f:\n", + "# json.dump(data, f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e54eebb6-0a9f-4a72-84d2-f12c5bd44510", + "metadata": {}, + "outputs": [], + "source": [ + "ds = \"deep1b\"\n", + "data = []\n", + "jss = []\n", + "root = f\"/checkpoint/gsz/bench_fw/codecs/{ds}\"\n", + "results = BIO(root).read_json(f\"result.json\")\n", + "for k, e in results[\"experiments\"].items():\n", + " if \"rec\" in k and e['factory'] != 'Flat': # and e['sym_recall'] > 0.0: # and \"PRQ\" in e['factory'] and e['sym_recall'] > 0.0:\n", + " code_size = results['indices'][e['codec']]['sa_code_size']\n", + " codec_size = results['indices'][e['codec']]['codec_size']\n", + " training_time = results['indices'][e['codec']]['training_time']\n", + " # training_size = results['indices'][e['codec']]['training_size']\n", + " cpu = e['cpu'] if 'cpu' in e else \"\"\n", + " ps = ', '.join([f\"{k}={v}\" for k,v in e['construction_params'][0].items()]) if e['construction_params'] else \" \"\n", + " eps = ', '.join([f\"{k}={v}\" for k,v in e['reconstruct_params'].items() if k != \"snap\"]) if e['reconstruct_params'] else \" \"\n", + " data.append((code_size, f\"|{e['factory']}|{ps}|{eps}|{code_size}|{pretty_size(codec_size)}|{pretty_time(training_time)}|{training_size}|{pretty_mse(e['mse'])}|{e['sym_recall']}|{e['asym_recall']}|{pretty_time(e['encode_time'])}|{pretty_time(e['decode_time'])}|{cpu}|\"))\n", + " jss.append({\n", + " 'factory': e['factory'],\n", + " 'parameters': e['construction_params'][0] if e['construction_params'] else \"\",\n", + " 'evaluation_params': e['reconstruct_params'],\n", + " 'code_size': code_size,\n", + " 'codec_size': codec_size,\n", + " 'training_time': training_time,\n", + " 'training_size': training_size,\n", + " 'mse': e['mse'],\n", + " 'sym_recall': e['sym_recall'],\n", + " 'asym_recall': e['asym_recall'],\n", + " 'encode_time': e['encode_time'],\n", + " 'decode_time': e['decode_time'],\n", + " 'cpu': cpu,\n", + " })\n", + "\n", + "print(\"|factory key|construction parameters|evaluation parameters|code size|codec size|training time|training size|mean squared error|sym recall @ 1|asym recall @ 1|encode time|decode time|cpu|\")\n", + "print(\"|-|-|-|-|-|-|-|-|-|\")\n", + "data.sort()\n", + "for d in data:\n", + " print(d[1])\n", + "\n", + "with open(f\"/checkpoint/gsz/bench_fw/codecs_{ds}_test.json\", \"w\") as f:\n", + " json.dump(jss, f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1216733-9670-407c-b3d2-5f87bce0321c", + "metadata": {}, + "outputs": [], + "source": [ + "def read_file(filename: str, keys):\n", + " results = []\n", + " with ZipFile(filename, \"r\") as zip_file:\n", + " for key in keys:\n", + " with zip_file.open(key, \"r\") as f:\n", + " if key in [\"D\", \"I\", \"R\", \"lims\"]:\n", + " results.append(np.load(f))\n", + " elif key in [\"P\"]:\n", + " t = io.TextIOWrapper(f)\n", + " results.append(json.load(t))\n", + " else:\n", + " raise AssertionError()\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56de051e-22db-4bef-b242-1ddabc9e0bb9", + "metadata": {}, + "outputs": [], + "source": [ + "ds = \"contriever\"\n", + "data = []\n", + "jss = []\n", + "root = f\"/checkpoint/gsz/bench_fw/codecs/{ds}\"\n", + "for lf in glob.glob(root + '/*rec*.zip'):\n", + " e, = read_file(lf, ['P'])\n", + " if e['factory'] != 'Flat': # and e['sym_recall'] > 0.0: # and \"PRQ\" in e['factory'] and e['sym_recall'] > 0.0:\n", + " code_size = e['codec_meta']['sa_code_size']\n", + " codec_size = e['codec_meta']['codec_size']\n", + " training_time = e['codec_meta']['training_time']\n", + " training_size = None # e['codec_meta']['training_size']\n", + " cpu = e['cpu'] if 'cpu' in e else \"\"\n", + " ps = ', '.join([f\"{k}={v}\" for k,v in e['construction_params'][0].items()]) if e['construction_params'] else \" \"\n", + " eps = ', '.join([f\"{k}={v}\" for k,v in e['reconstruct_params'].items() if k != \"snap\"]) if e['reconstruct_params'] else \" \"\n", + " if eps in ps and eps != \"encode_ils_iters=16\" and eps != \"max_beam_size=32\":\n", + " eps = \" \"\n", + " data.append((code_size, f\"|{e['factory']}|{ps}|{eps}|{code_size}|{pretty_size(codec_size)}|{pretty_time(training_time)}|{pretty_mse(e['mse'])}|{e['sym_recall']}|{e['asym_recall']}|{pretty_time(e['encode_time'])}|{pretty_time(e['decode_time'])}|{cpu}|\"))\n", + " eps = e['reconstruct_params']\n", + " del eps['snap']\n", + " params = copy(e['construction_params'][0]) if e['construction_params'] else {}\n", + " for k, v in e['reconstruct_params'].items():\n", + " params[k] = v\n", + " jss.append({\n", + " 'factory': e['factory'],\n", + " 'params': params,\n", + " 'construction_params': e['construction_params'][0] if e['construction_params'] else {},\n", + " 'evaluation_params': e['reconstruct_params'],\n", + " 'code_size': code_size,\n", + " 'codec_size': codec_size,\n", + " 'training_time': training_time,\n", + " # 'training_size': training_size,\n", + " 'mse': e['mse'],\n", + " 'sym_recall': e['sym_recall'],\n", + " 'asym_recall': e['asym_recall'],\n", + " 'encode_time': e['encode_time'],\n", + " 'decode_time': e['decode_time'],\n", + " 'cpu': cpu,\n", + " })\n", + "\n", + "print(\"|factory key|construction parameters|encode/decode parameters|code size|codec size|training time|mean squared error|sym recall @ 1|asym recall @ 1|encode time|decode time|cpu|\")\n", + "print(\"|-|-|-|-|-|-|-|-|-|\")\n", + "data.sort()\n", + "# for d in data:\n", + "# print(d[1])\n", + "\n", + "print(len(data))\n", + "\n", + "with open(f\"/checkpoint/gsz/bench_fw/codecs_{ds}_5.json\", \"w\") as f:\n", + " json.dump(jss, f)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "faiss_binary (local)", + "language": "python", + "name": "faiss_binary_local" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/thirdparty/faiss/benchs/bench_fw_optimize.py b/thirdparty/faiss/benchs/bench_fw_optimize.py index 31b56f9f5..11e625e23 100644 --- a/thirdparty/faiss/benchs/bench_fw_optimize.py +++ b/thirdparty/faiss/benchs/bench_fw_optimize.py @@ -7,9 +7,9 @@ import logging import os -from bench_fw.benchmark_io import BenchmarkIO -from bench_fw.descriptors import DatasetDescriptor -from bench_fw.optimize import Optimizer +from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO +from faiss.benchs.bench_fw.descriptors import DatasetDescriptor +from faiss.benchs.bench_fw.optimize import Optimizer logging.basicConfig(level=logging.INFO) diff --git a/thirdparty/faiss/benchs/bench_fw_range.py b/thirdparty/faiss/benchs/bench_fw_range.py index f38de114f..0d4b65afa 100644 --- a/thirdparty/faiss/benchs/bench_fw_range.py +++ b/thirdparty/faiss/benchs/bench_fw_range.py @@ -3,28 +3,29 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import logging import argparse +import logging import os -from bench_fw.benchmark import Benchmark -from bench_fw.benchmark_io import BenchmarkIO -from bench_fw.descriptors import DatasetDescriptor, IndexDescriptor +from faiss.benchs.bench_fw.benchmark import Benchmark +from faiss.benchs.bench_fw.benchmark_io import BenchmarkIO +from faiss.benchs.bench_fw.descriptors import DatasetDescriptor, IndexDescriptorClassic logging.basicConfig(level=logging.INFO) + def ssnpp(bio): benchmark = Benchmark( num_threads=32, training_vectors=DatasetDescriptor( - tablename="ssnpp_training_5M.npy", + tablename="training.npy", ), database_vectors=DatasetDescriptor( - tablename="ssnpp_xb_range_filtered_119201.npy", + tablename="database.npy", ), - query_vectors=DatasetDescriptor(tablename="ssnpp_xq_range_filtered_33615.npy"), + query_vectors=DatasetDescriptor(tablename="query.npy"), index_descs=[ - IndexDescriptor( + IndexDescriptorClassic( factory="Flat", range_metrics={ "weighted": [ @@ -56,7 +57,7 @@ def ssnpp(bio): ] }, ), - IndexDescriptor( + IndexDescriptorClassic( factory="IVF262144(PQ256x4fs),PQ32", ), ], @@ -67,6 +68,7 @@ def ssnpp(bio): benchmark.set_io(bio) benchmark.benchmark("result.json", local=False, train=True, reconstruct=False, knn=False, range=True) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('experiment') diff --git a/thirdparty/faiss/c_api/IndexScalarQuantizer_c.h b/thirdparty/faiss/c_api/IndexScalarQuantizer_c.h index becdb201e..5c6694695 100644 --- a/thirdparty/faiss/c_api/IndexScalarQuantizer_c.h +++ b/thirdparty/faiss/c_api/IndexScalarQuantizer_c.h @@ -26,6 +26,9 @@ typedef enum FaissQuantizerType { QT_fp16, QT_8bit_direct, ///< fast indexing of uint8s QT_6bit, ///< 6 bits per component + QT_bf16, + QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from [-128 + ///< to 127] } FaissQuantizerType; // forward declaration diff --git a/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml b/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml index c43e7656c..1dde8e986 100644 --- a/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml +++ b/thirdparty/faiss/conda/faiss-gpu-raft/meta.yaml @@ -48,21 +48,25 @@ outputs: - {{ compiler('cxx') }} - sysroot_linux-64 # [linux64] - llvm-openmp # [osx] - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] + - mkl =2023 # [x86_64] - mkl-devel =2023 # [x86_64] - cuda-toolkit {{ cudatoolkit }} host: + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] - openblas # [not x86_64] - - libraft =24.04 + - libraft =24.06 - cuda-version {{ cuda_constraints }} run: + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] - openblas # [not x86_64] - cuda-cudart {{ cuda_constraints }} - libcublas {{ libcublas_constraints }} - - libraft =24.04 + - libraft =24.06 - cuda-version {{ cuda_constraints }} test: requires: @@ -85,13 +89,18 @@ outputs: - {{ compiler('cxx') }} - sysroot_linux-64 =2.17 # [linux64] - swig - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] + - mkl =2023 # [x86_64] + - cuda-toolkit {{ cudatoolkit }} host: + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - python {{ python }} - numpy >=1.19,<2 - {{ pin_subpackage('libfaiss', exact=True) }} run: + - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - python {{ python }} - numpy >=1.19,<2 - packaging diff --git a/thirdparty/faiss/conda/faiss-gpu/build-lib.sh b/thirdparty/faiss/conda/faiss-gpu/build-lib.sh index 2d25e9c5e..9957be96e 100755 --- a/thirdparty/faiss/conda/faiss-gpu/build-lib.sh +++ b/thirdparty/faiss/conda/faiss-gpu/build-lib.sh @@ -6,6 +6,12 @@ set -e +# Workaround for CUDA 11.4.4 builds. Moves all necessary headers to include root. +if [ -n "$FAISS_FLATTEN_CONDA_INCLUDES" ] && [ "$FAISS_FLATTEN_CONDA_INCLUDES" = "1" ]; then + cp -r -n "$CONDA_PREFIX/x86_64-conda-linux-gnu/sysroot/usr/include/"* "$CONDA_PREFIX/include/" + cp -r -n "$CONDA_PREFIX/x86_64-conda-linux-gnu/include/c++/11.2.0/"* "$CONDA_PREFIX/include/" + cp -r -n "$CONDA_PREFIX/x86_64-conda-linux-gnu/include/c++/11.2.0/x86_64-conda-linux-gnu/"* "$CONDA_PREFIX/include/" +fi # Build libfaiss.so/libfaiss_avx2.so/libfaiss_avx512.so cmake -B _build \ diff --git a/thirdparty/faiss/conda/faiss-gpu/meta.yaml b/thirdparty/faiss/conda/faiss-gpu/meta.yaml index b0df70718..05f7b5900 100644 --- a/thirdparty/faiss/conda/faiss-gpu/meta.yaml +++ b/thirdparty/faiss/conda/faiss-gpu/meta.yaml @@ -43,12 +43,13 @@ outputs: - {{ pin_compatible('libfaiss', exact=True) }} script_env: - CUDA_ARCHS + - FAISS_FLATTEN_CONDA_INCLUDES requirements: build: - {{ compiler('cxx') }} - sysroot_linux-64 # [linux64] - llvm-openmp # [osx] - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] - mkl-devel =2023 # [x86_64] - cuda-toolkit {{ cudatoolkit }} @@ -81,8 +82,9 @@ outputs: - {{ compiler('cxx') }} - sysroot_linux-64 =2.17 # [linux64] - swig - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] + - cuda-toolkit {{ cudatoolkit }} host: - python {{ python }} - numpy >=1.19,<2 diff --git a/thirdparty/faiss/conda/faiss/meta.yaml b/thirdparty/faiss/conda/faiss/meta.yaml index c4d66ca0d..79e7be953 100644 --- a/thirdparty/faiss/conda/faiss/meta.yaml +++ b/thirdparty/faiss/conda/faiss/meta.yaml @@ -39,7 +39,7 @@ outputs: - {{ compiler('cxx') }} - sysroot_linux-64 # [linux64] - llvm-openmp # [osx] - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] - mkl-devel =2023 # [x86_64] host: @@ -69,7 +69,7 @@ outputs: - {{ compiler('cxx') }} - sysroot_linux-64 =2.17 # [linux64] - swig - - cmake >=3.23.1 + - cmake >=3.24.0 - make # [not win] host: - python {{ python }} diff --git a/thirdparty/faiss/contrib/datasets.py b/thirdparty/faiss/contrib/datasets.py index f37a2fb6e..281f16e2f 100644 --- a/thirdparty/faiss/contrib/datasets.py +++ b/thirdparty/faiss/contrib/datasets.py @@ -6,6 +6,8 @@ import os import numpy as np import faiss +import getpass + from .vecs_io import fvecs_read, ivecs_read, bvecs_mmap, fvecs_mmap from .exhaustive_search import knn @@ -115,10 +117,12 @@ def get_groundtruth(self, k=100): # that directory is ############################################################################ +username = getpass.getuser() for dataset_basedir in ( '/datasets01/simsearch/041218/', - '/mnt/vol/gfsai-flash3-east/ai-group/datasets/simsearch/'): + '/mnt/vol/gfsai-flash3-east/ai-group/datasets/simsearch/', + f'/home/{username}/simsearch/data/'): if os.path.exists(dataset_basedir): break else: diff --git a/thirdparty/faiss/contrib/factory_tools.py b/thirdparty/faiss/contrib/factory_tools.py index da90e986f..dde312b02 100644 --- a/thirdparty/faiss/contrib/factory_tools.py +++ b/thirdparty/faiss/contrib/factory_tools.py @@ -56,6 +56,8 @@ def get_code_size(d, indexkey): return (d * 6 + 7) // 8 elif indexkey == 'SQfp16': return d * 2 + elif indexkey == 'SQbf16': + return d * 2 mo = re.match('PCAR?(\\d+),(.*)$', indexkey) if mo: @@ -123,6 +125,7 @@ def reverse_index_factory(index): faiss.ScalarQuantizer.QT_4bit: "4", faiss.ScalarQuantizer.QT_6bit: "6", faiss.ScalarQuantizer.QT_fp16: "fp16", + faiss.ScalarQuantizer.QT_bf16: "bf16", } return f"SQ{sqtypes[index.sq.qtype]}" diff --git a/thirdparty/faiss/contrib/vecs_io.py b/thirdparty/faiss/contrib/vecs_io.py index 5d18c0b16..9ef9e0ab6 100644 --- a/thirdparty/faiss/contrib/vecs_io.py +++ b/thirdparty/faiss/contrib/vecs_io.py @@ -14,7 +14,7 @@ def ivecs_read(fname): a = np.fromfile(fname, dtype='int32') - if sys.big_endian: + if sys.byteorder == 'big': a.byteswap(inplace=True) d = a[0] return a.reshape(-1, d + 1)[:, 1:].copy() @@ -25,7 +25,7 @@ def fvecs_read(fname): def ivecs_mmap(fname): - assert not sys.big_endian + assert sys.byteorder != 'big' a = np.memmap(fname, dtype='int32', mode='r') d = a[0] return a.reshape(-1, d + 1)[:, 1:] @@ -37,7 +37,7 @@ def fvecs_mmap(fname): def bvecs_mmap(fname): x = np.memmap(fname, dtype='uint8', mode='r') - if sys.big_endian: + if sys.byteorder == 'big': da = x[:4][::-1].copy() d = da.view('int32')[0] else: @@ -50,7 +50,7 @@ def ivecs_write(fname, m): m1 = np.empty((n, d + 1), dtype='int32') m1[:, 0] = d m1[:, 1:] = m - if sys.big_endian: + if sys.byteorder == 'big': m1.byteswap(inplace=True) m1.tofile(fname) diff --git a/thirdparty/faiss/faiss/IndexFlat.cpp b/thirdparty/faiss/faiss/IndexFlat.cpp index bb7367cd5..5f2465228 100644 --- a/thirdparty/faiss/faiss/IndexFlat.cpp +++ b/thirdparty/faiss/faiss/IndexFlat.cpp @@ -74,10 +74,18 @@ void IndexFlat::search( float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances}; knn_jaccard(x, get_xb(), d, n, ntotal, &res, sel); } else { - FAISS_THROW_IF_NOT(!sel); - float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances}; knn_extra_metrics( - x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res); + x, + get_xb(), + d, + n, + ntotal, + metric_type, + metric_arg, + k, + distances, + labels, + sel); } } diff --git a/thirdparty/faiss/faiss/IndexHNSW.cpp b/thirdparty/faiss/faiss/IndexHNSW.cpp index 3325c8c0e..c0bb81c05 100644 --- a/thirdparty/faiss/faiss/IndexHNSW.cpp +++ b/thirdparty/faiss/faiss/IndexHNSW.cpp @@ -5,8 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - #include #include @@ -17,7 +15,10 @@ #include #include +#include +#include #include +#include #include #include @@ -68,52 +69,6 @@ HNSWStats hnsw_stats; namespace { -/* Wrap the distance computer into one that negates the - distances. This makes supporting INNER_PRODUCE search easier */ - -struct NegativeDistanceComputer : DistanceComputer { - /// owned by this - DistanceComputer* basedis; - - explicit NegativeDistanceComputer(DistanceComputer* basedis) - : basedis(basedis) {} - - void set_query(const float* x) override { - basedis->set_query(x); - } - - /// compute distance of vector i to current query - float operator()(idx_t i) override { - return -(*basedis)(i); - } - - void distances_batch_4( - const idx_t idx0, - const idx_t idx1, - const idx_t idx2, - const idx_t idx3, - float& dis0, - float& dis1, - float& dis2, - float& dis3) override { - basedis->distances_batch_4( - idx0, idx1, idx2, idx3, dis0, dis1, dis2, dis3); - dis0 = -dis0; - dis1 = -dis1; - dis2 = -dis2; - dis3 = -dis3; - } - - /// compute distance between two stored vectors - float symmetric_dis(idx_t i, idx_t j) override { - return -basedis->symmetric_dis(i, j); - } - - virtual ~NegativeDistanceComputer() { - delete basedis; - } -}; - DistanceComputer* storage_distance_computer(const Index* storage) { if (is_similarity_metric(storage->metric_type)) { return new NegativeDistanceComputer(storage->get_distance_computer()); @@ -192,7 +147,9 @@ void hnsw_add_vertices( int i1 = n; - for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) { + for (int pt_level = hist.size() - 1; + pt_level >= !index_hnsw.init_level0; + pt_level--) { int i0 = i1 - hist[pt_level]; if (verbose) { @@ -228,7 +185,13 @@ void hnsw_add_vertices( continue; } - hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt); + hnsw.add_with_locks( + *dis, + pt_level, + pt_id, + locks, + vt, + index_hnsw.keep_max_size_level0 && (pt_level == 0)); if (prev_display >= 0 && i - i0 > prev_display + 10000) { prev_display = i - i0; @@ -248,7 +211,11 @@ void hnsw_add_vertices( } i1 = i0; } - FAISS_ASSERT(i1 == 0); + if (index_hnsw.init_level0) { + FAISS_ASSERT(i1 == 0); + } else { + FAISS_ASSERT((i1 - hist[0]) == 0); + } } if (verbose) { printf("Done in %.3f ms\n", getmillisecs() - t0); @@ -297,7 +264,8 @@ void hnsw_search( const SearchParameters* params_in) { FAISS_THROW_IF_NOT_MSG( index->storage, - "Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly"); + "No storage index, please use IndexHNSWFlat (or variants) " + "instead of IndexHNSW directly"); const SearchParametersHNSW* params = nullptr; const HNSW& hnsw = index->hnsw; @@ -451,10 +419,18 @@ void IndexHNSW::search_level_0( float* distances, idx_t* labels, int nprobe, - int search_type) const { + int search_type, + const SearchParameters* params_in) const { FAISS_THROW_IF_NOT(k > 0); FAISS_THROW_IF_NOT(nprobe > 0); + const SearchParametersHNSW* params = nullptr; + + if (params_in) { + params = dynamic_cast(params_in); + FAISS_THROW_IF_NOT_MSG(params, "params type invalid"); + } + storage_idx_t ntotal = hnsw.levels.size(); using RH = HeapBlockResultHandler; @@ -481,13 +457,21 @@ void IndexHNSW::search_level_0( nearest_d + i * nprobe, search_type, search_stats, - vt); + vt, + params); res.end(); vt.advance(); } #pragma omp critical { hnsw_stats.combine(search_stats); } } + if (is_similarity_metric(this->metric_type)) { +// we need to revert the negated distances +#pragma omp parallel for + for (int64_t i = 0; i < k * n; i++) { + distances[i] = -distances[i]; + } + } } void IndexHNSW::init_level_0_from_knngraph( @@ -910,4 +894,86 @@ void IndexHNSW2Level::flip_to_ivf() { delete storage2l; } +/************************************************************** + * IndexHNSWCagra implementation + **************************************************************/ + +IndexHNSWCagra::IndexHNSWCagra() { + is_trained = true; +} + +IndexHNSWCagra::IndexHNSWCagra(int d, int M, MetricType metric) + : IndexHNSW( + (metric == METRIC_L2) + ? static_cast(new IndexFlatL2(d)) + : static_cast(new IndexFlatIP(d)), + M) { + FAISS_THROW_IF_NOT_MSG( + ((metric == METRIC_L2) || (metric == METRIC_INNER_PRODUCT)), + "unsupported metric type for IndexHNSWCagra"); + own_fields = true; + is_trained = true; + init_level0 = true; + keep_max_size_level0 = true; +} + +void IndexHNSWCagra::add(idx_t n, const float* x) { + FAISS_THROW_IF_NOT_MSG( + !base_level_only, + "Cannot add vectors when base_level_only is set to True"); + + IndexHNSW::add(n, x); +} + +void IndexHNSWCagra::search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + const SearchParameters* params) const { + if (!base_level_only) { + IndexHNSW::search(n, x, k, distances, labels, params); + } else { + std::vector nearest(n); + std::vector nearest_d(n); + +#pragma omp for + for (idx_t i = 0; i < n; i++) { + std::unique_ptr dis( + storage_distance_computer(this->storage)); + dis->set_query(x + i * d); + nearest[i] = -1; + nearest_d[i] = std::numeric_limits::max(); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distrib(0, this->ntotal - 1); + + for (idx_t j = 0; j < num_base_level_search_entrypoints; j++) { + auto idx = distrib(gen); + auto distance = (*dis)(idx); + if (distance < nearest_d[i]) { + nearest[i] = idx; + nearest_d[i] = distance; + } + } + FAISS_THROW_IF_NOT_MSG( + nearest[i] >= 0, "Could not find a valid entrypoint."); + } + + search_level_0( + n, + x, + k, + nearest.data(), + nearest_d.data(), + distances, + labels, + 1, // n_probes + 1, // search_type + params); + } +} + } // namespace faiss diff --git a/thirdparty/faiss/faiss/IndexHNSW.h b/thirdparty/faiss/faiss/IndexHNSW.h index e0b65fca9..71807c653 100644 --- a/thirdparty/faiss/faiss/IndexHNSW.h +++ b/thirdparty/faiss/faiss/IndexHNSW.h @@ -34,6 +34,18 @@ struct IndexHNSW : Index { bool own_fields = false; Index* storage = nullptr; + // When set to false, level 0 in the knn graph is not initialized. + // This option is used by GpuIndexCagra::copyTo(IndexHNSWCagra*) + // as level 0 knn graph is copied over from the index built by + // GpuIndexCagra. + bool init_level0 = true; + + // When set to true, all neighbors in level 0 are filled up + // to the maximum size allowed (2 * M). This option is used by + // IndexHHNSWCagra to create a full base layer graph that is + // used when GpuIndexCagra::copyFrom(IndexHNSWCagra*) is invoked. + bool keep_max_size_level0 = false; + explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2); explicit IndexHNSW(Index* storage, int M = 32); @@ -81,7 +93,8 @@ struct IndexHNSW : Index { float* distances, idx_t* labels, int nprobe = 1, - int search_type = 1) const; + int search_type = 1, + const SearchParameters* params = nullptr) const; /// alternative graph building void init_level_0_from_knngraph(int k, const float* D, const idx_t* I); @@ -148,4 +161,33 @@ struct IndexHNSW2Level : IndexHNSW { const SearchParameters* params = nullptr) const override; }; +struct IndexHNSWCagra : IndexHNSW { + IndexHNSWCagra(); + IndexHNSWCagra(int d, int M, MetricType metric = METRIC_L2); + + /// When set to true, the index is immutable. + /// This option is used to copy the knn graph from GpuIndexCagra + /// to the base level of IndexHNSWCagra without adding upper levels. + /// Doing so enables to search the HNSW index, but removes the + /// ability to add vectors. + bool base_level_only = false; + + /// When `base_level_only` is set to `True`, the search function + /// searches only the base level knn graph of the HNSW index. + /// This parameter selects the entry point by randomly selecting + /// some points and using the best one. + int num_base_level_search_entrypoints = 32; + + void add(idx_t n, const float* x) override; + + /// entry point for search + void search( + idx_t n, + const float* x, + idx_t k, + float* distances, + idx_t* labels, + const SearchParameters* params = nullptr) const override; +}; + } // namespace faiss diff --git a/thirdparty/faiss/faiss/IndexIVFFastScan.cpp b/thirdparty/faiss/faiss/IndexIVFFastScan.cpp index e3093e5fa..d93ac1481 100644 --- a/thirdparty/faiss/faiss/IndexIVFFastScan.cpp +++ b/thirdparty/faiss/faiss/IndexIVFFastScan.cpp @@ -974,12 +974,6 @@ void IndexIVFFastScan::search_implem_10( size_t* nlist_out, const NormTableScaler* scaler, const IVFSearchParameters* params) const { - // const size_t nprobe = params ? params->nprobe : this->nprobe; - // const size_t max_codes = params ? params->max_codes : this->max_codes; - // const IDSelector* sel = params ? params->sel : nullptr; - // const SearchParameters* quantizer_params = - // params ? params->quantizer_params : nullptr; - size_t dim12 = ksub * M2; AlignedTable dis_tables; AlignedTable biases; diff --git a/thirdparty/faiss/faiss/IndexNNDescent.cpp b/thirdparty/faiss/faiss/IndexNNDescent.cpp index 27bd6e33e..382e9c41c 100644 --- a/thirdparty/faiss/faiss/IndexNNDescent.cpp +++ b/thirdparty/faiss/faiss/IndexNNDescent.cpp @@ -58,35 +58,6 @@ using storage_idx_t = NNDescent::storage_idx_t; namespace { -/* Wrap the distance computer into one that negates the - distances. This makes supporting INNER_PRODUCE search easier */ - -struct NegativeDistanceComputer : DistanceComputer { - /// owned by this - DistanceComputer* basedis; - - explicit NegativeDistanceComputer(DistanceComputer* basedis) - : basedis(basedis) {} - - void set_query(const float* x) override { - basedis->set_query(x); - } - - /// compute distance of vector i to current query - float operator()(idx_t i) override { - return -(*basedis)(i); - } - - /// compute distance between two stored vectors - float symmetric_dis(idx_t i, idx_t j) override { - return -basedis->symmetric_dis(i, j); - } - - ~NegativeDistanceComputer() override { - delete basedis; - } -}; - DistanceComputer* storage_distance_computer(const Index* storage) { if (is_similarity_metric(storage->metric_type)) { return new NegativeDistanceComputer(storage->get_distance_computer()); diff --git a/thirdparty/faiss/faiss/IndexScalarQuantizer.cpp b/thirdparty/faiss/faiss/IndexScalarQuantizer.cpp index d7719e494..efdd0bc7d 100644 --- a/thirdparty/faiss/faiss/IndexScalarQuantizer.cpp +++ b/thirdparty/faiss/faiss/IndexScalarQuantizer.cpp @@ -32,7 +32,9 @@ IndexScalarQuantizer::IndexScalarQuantizer( MetricType metric) : IndexFlatCodes(0, d, metric), sq(d, qtype) { is_trained = qtype == ScalarQuantizer::QT_fp16 || - qtype == ScalarQuantizer::QT_8bit_direct; + qtype == ScalarQuantizer::QT_8bit_direct || + qtype == ScalarQuantizer::QT_bf16 || + qtype == ScalarQuantizer::QT_8bit_direct_signed; code_size = sq.code_size; } diff --git a/thirdparty/faiss/faiss/MetricType.h b/thirdparty/faiss/faiss/MetricType.h index 6904fa203..067cb142a 100644 --- a/thirdparty/faiss/faiss/MetricType.h +++ b/thirdparty/faiss/faiss/MetricType.h @@ -39,6 +39,10 @@ enum MetricType { METRIC_Canberra = 20, METRIC_BrayCurtis = 21, METRIC_JensenShannon = 22, + /// Squared Eucliden distance, ignoring NaNs + METRIC_NaNEuclidean = 24, + /// abs(x | y): the distance to a hyperplane + METRIC_ABS_INNER_PRODUCT = 25, }; /// all vector indices are this type diff --git a/thirdparty/faiss/faiss/gpu/GpuIcmEncoder.cu b/thirdparty/faiss/faiss/gpu/GpuIcmEncoder.cu index 434fae9e3..8bd60f91b 100644 --- a/thirdparty/faiss/faiss/gpu/GpuIcmEncoder.cu +++ b/thirdparty/faiss/faiss/gpu/GpuIcmEncoder.cu @@ -82,7 +82,7 @@ void GpuIcmEncoder::encode( size_t n, size_t ils_iters) const { size_t nshards = shards->size(); - size_t shard_size = (n + nshards - 1) / nshards; + size_t base_shard_size = n / nshards; auto codebooks = lsq->codebooks.data(); auto M = lsq->M; @@ -94,8 +94,14 @@ void GpuIcmEncoder::encode( // split input data auto fn = [=](int idx, IcmEncoderImpl* encoder) { - size_t i0 = idx * shard_size; - size_t ni = std::min(shard_size, n - i0); + size_t i0 = idx * base_shard_size + std::min(size_t(idx), n % nshards); + size_t ni = base_shard_size; + if (ni < n % nshards) { + ++ni; + } + if (ni <= 0) { // only if n < nshards + return; + } auto xi = x + i0 * d; auto ci = codes + i0 * M; std::mt19937 geni(idx + seed); // different seed for each shard diff --git a/thirdparty/faiss/faiss/impl/AuxIndexStructures.cpp b/thirdparty/faiss/faiss/impl/AuxIndexStructures.cpp index cebe8a1e2..e2b2791e5 100644 --- a/thirdparty/faiss/faiss/impl/AuxIndexStructures.cpp +++ b/thirdparty/faiss/faiss/impl/AuxIndexStructures.cpp @@ -236,4 +236,29 @@ size_t InterruptCallback::get_period_hint(size_t flops) { return std::max((size_t)10 * 10 * 1000 * 1000 / (flops + 1), (size_t)1); } +void TimeoutCallback::set_timeout(double timeout_in_seconds) { + timeout = timeout_in_seconds; + start = std::chrono::steady_clock::now(); +} + +bool TimeoutCallback::want_interrupt() { + if (timeout == 0) { + return false; + } + auto end = std::chrono::steady_clock::now(); + std::chrono::duration duration = end - start; + float elapsed_in_seconds = duration.count() / 1000.0; + if (elapsed_in_seconds > timeout) { + timeout = 0; + return true; + } + return false; +} + +void TimeoutCallback::reset(double timeout_in_seconds) { + auto tc(new faiss::TimeoutCallback()); + faiss::InterruptCallback::instance.reset(tc); + tc->set_timeout(timeout_in_seconds); +} + } // namespace faiss diff --git a/thirdparty/faiss/faiss/impl/AuxIndexStructures.h b/thirdparty/faiss/faiss/impl/AuxIndexStructures.h index f8b5cca84..7e12a1a3a 100644 --- a/thirdparty/faiss/faiss/impl/AuxIndexStructures.h +++ b/thirdparty/faiss/faiss/impl/AuxIndexStructures.h @@ -161,6 +161,14 @@ struct FAISS_API InterruptCallback { static size_t get_period_hint(size_t flops); }; +struct TimeoutCallback : InterruptCallback { + std::chrono::time_point start; + double timeout; + bool want_interrupt() override; + void set_timeout(double timeout_in_seconds); + static void reset(double timeout_in_seconds); +}; + /// set implementation optimized for fast access. struct VisitedTable { std::vector visited; diff --git a/thirdparty/faiss/faiss/impl/DistanceComputer.h b/thirdparty/faiss/faiss/impl/DistanceComputer.h index dc46d113f..5ac3a702c 100644 --- a/thirdparty/faiss/faiss/impl/DistanceComputer.h +++ b/thirdparty/faiss/faiss/impl/DistanceComputer.h @@ -59,6 +59,52 @@ struct DistanceComputer { virtual ~DistanceComputer() {} }; +/* Wrap the distance computer into one that negates the + distances. This makes supporting INNER_PRODUCE search easier */ + +struct NegativeDistanceComputer : DistanceComputer { + /// owned by this + DistanceComputer* basedis; + + explicit NegativeDistanceComputer(DistanceComputer* basedis) + : basedis(basedis) {} + + void set_query(const float* x) override { + basedis->set_query(x); + } + + /// compute distance of vector i to current query + float operator()(idx_t i) override { + return -(*basedis)(i); + } + + void distances_batch_4( + const idx_t idx0, + const idx_t idx1, + const idx_t idx2, + const idx_t idx3, + float& dis0, + float& dis1, + float& dis2, + float& dis3) override { + basedis->distances_batch_4( + idx0, idx1, idx2, idx3, dis0, dis1, dis2, dis3); + dis0 = -dis0; + dis1 = -dis1; + dis2 = -dis2; + dis3 = -dis3; + } + + /// compute distance between two stored vectors + float symmetric_dis(idx_t i, idx_t j) override { + return -basedis->symmetric_dis(i, j); + } + + virtual ~NegativeDistanceComputer() { + delete basedis; + } +}; + /************************************************************* * Specialized version of the DistanceComputer when we know that codes are * laid out in a flat index. diff --git a/thirdparty/faiss/faiss/impl/HNSW.cpp b/thirdparty/faiss/faiss/impl/HNSW.cpp index d8c822596..3ba5f72f6 100644 --- a/thirdparty/faiss/faiss/impl/HNSW.cpp +++ b/thirdparty/faiss/faiss/impl/HNSW.cpp @@ -7,6 +7,7 @@ #include +#include #include #include @@ -215,8 +216,8 @@ int HNSW::prepare_level_tab(size_t n, bool preset_levels) { if (pt_level > max_level) max_level = pt_level; offsets.push_back(offsets.back() + cum_nb_neighbors(pt_level + 1)); - neighbors.resize(offsets.back(), -1); } + neighbors.resize(offsets.back(), -1); return max_level; } @@ -229,7 +230,14 @@ void HNSW::shrink_neighbor_list( DistanceComputer& qdis, std::priority_queue& input, std::vector& output, - int max_size) { + int max_size, + bool keep_max_size_level0) { + // This prevents number of neighbors at + // level 0 from being shrunk to less than 2 * M. + // This is essential in making sure + // `faiss::gpu::GpuIndexCagra::copyFrom(IndexHNSWCagra*)` is functional + std::vector outsiders; + while (input.size() > 0) { NodeDistFarther v1 = input.top(); input.pop(); @@ -250,8 +258,15 @@ void HNSW::shrink_neighbor_list( if (output.size() >= max_size) { return; } + } else if (keep_max_size_level0) { + outsiders.push_back(v1); } } + size_t idx = 0; + while (keep_max_size_level0 && (output.size() < max_size) && + (idx < outsiders.size())) { + output.push_back(outsiders[idx++]); + } } namespace { @@ -268,7 +283,8 @@ using NodeDistFarther = HNSW::NodeDistFarther; void shrink_neighbor_list( DistanceComputer& qdis, std::priority_queue& resultSet1, - int max_size) { + int max_size, + bool keep_max_size_level0 = false) { if (resultSet1.size() < max_size) { return; } @@ -280,7 +296,8 @@ void shrink_neighbor_list( resultSet1.pop(); } - HNSW::shrink_neighbor_list(qdis, resultSet, returnlist, max_size); + HNSW::shrink_neighbor_list( + qdis, resultSet, returnlist, max_size, keep_max_size_level0); for (NodeDistFarther curen2 : returnlist) { resultSet1.emplace(curen2.d, curen2.id); @@ -294,7 +311,8 @@ void add_link( DistanceComputer& qdis, storage_idx_t src, storage_idx_t dest, - int level) { + int level, + bool keep_max_size_level0 = false) { size_t begin, end; hnsw.neighbor_range(src, level, &begin, &end); if (hnsw.neighbors[end - 1] == -1) { @@ -319,7 +337,7 @@ void add_link( resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh); } - shrink_neighbor_list(qdis, resultSet, end - begin); + shrink_neighbor_list(qdis, resultSet, end - begin, keep_max_size_level0); // ...and back size_t i = begin; @@ -429,7 +447,8 @@ void HNSW::add_links_starting_from( float d_nearest, int level, omp_lock_t* locks, - VisitedTable& vt) { + VisitedTable& vt, + bool keep_max_size_level0) { std::priority_queue link_targets; search_neighbors_to_add( @@ -438,13 +457,13 @@ void HNSW::add_links_starting_from( // but we can afford only this many neighbors int M = nb_neighbors(level); - ::faiss::shrink_neighbor_list(ptdis, link_targets, M); + ::faiss::shrink_neighbor_list(ptdis, link_targets, M, keep_max_size_level0); std::vector neighbors; neighbors.reserve(link_targets.size()); while (!link_targets.empty()) { storage_idx_t other_id = link_targets.top().id; - add_link(*this, ptdis, pt_id, other_id, level); + add_link(*this, ptdis, pt_id, other_id, level, keep_max_size_level0); neighbors.push_back(other_id); link_targets.pop(); } @@ -452,7 +471,7 @@ void HNSW::add_links_starting_from( omp_unset_lock(&locks[pt_id]); for (storage_idx_t other_id : neighbors) { omp_set_lock(&locks[other_id]); - add_link(*this, ptdis, other_id, pt_id, level); + add_link(*this, ptdis, other_id, pt_id, level, keep_max_size_level0); omp_unset_lock(&locks[other_id]); } omp_set_lock(&locks[pt_id]); @@ -467,7 +486,8 @@ void HNSW::add_with_locks( int pt_level, int pt_id, std::vector& locks, - VisitedTable& vt) { + VisitedTable& vt, + bool keep_max_size_level0) { // greedy search on upper levels storage_idx_t nearest; @@ -496,7 +516,14 @@ void HNSW::add_with_locks( for (; level >= 0; level--) { add_links_starting_from( - ptdis, pt_id, nearest, d_nearest, level, locks.data(), vt); + ptdis, + pt_id, + nearest, + d_nearest, + level, + locks.data(), + vt, + keep_max_size_level0); } omp_unset_lock(&locks[pt_id]); @@ -910,9 +937,12 @@ void HNSW::search_level_0( const float* nearest_d, int search_type, HNSWStats& search_stats, - VisitedTable& vt) const { + VisitedTable& vt, + const SearchParametersHNSW* params) const { const HNSW& hnsw = *this; + auto efSearch = params ? params->efSearch : hnsw.efSearch; int k = extract_k_from_ResultHandler(res); + if (search_type == 1) { int nres = 0; @@ -925,16 +955,24 @@ void HNSW::search_level_0( if (vt.get(cj)) continue; - int candidates_size = std::max(hnsw.efSearch, k); + int candidates_size = std::max(efSearch, k); MinimaxHeap candidates(candidates_size); candidates.push(cj, nearest_d[j]); nres = search_from_candidates( - hnsw, qdis, res, candidates, vt, search_stats, 0, nres); + hnsw, + qdis, + res, + candidates, + vt, + search_stats, + 0, + nres, + params); } } else if (search_type == 2) { - int candidates_size = std::max(hnsw.efSearch, int(k)); + int candidates_size = std::max(efSearch, int(k)); candidates_size = std::max(candidates_size, int(nprobe)); MinimaxHeap candidates(candidates_size); @@ -947,7 +985,7 @@ void HNSW::search_level_0( } search_from_candidates( - hnsw, qdis, res, candidates, vt, search_stats, 0); + hnsw, qdis, res, candidates, vt, search_stats, 0, 0, params); } } diff --git a/thirdparty/faiss/faiss/impl/HNSW.h b/thirdparty/faiss/faiss/impl/HNSW.h index 8261423cd..f3aacf8a5 100644 --- a/thirdparty/faiss/faiss/impl/HNSW.h +++ b/thirdparty/faiss/faiss/impl/HNSW.h @@ -184,7 +184,8 @@ struct HNSW { float d_nearest, int level, omp_lock_t* locks, - VisitedTable& vt); + VisitedTable& vt, + bool keep_max_size_level0 = false); /** add point pt_id on all levels <= pt_level and build the link * structure for them. */ @@ -193,7 +194,8 @@ struct HNSW { int pt_level, int pt_id, std::vector& locks, - VisitedTable& vt); + VisitedTable& vt, + bool keep_max_size_level0 = false); /// search interface for 1 point, single thread HNSWStats search( @@ -211,7 +213,8 @@ struct HNSW { const float* nearest_d, int search_type, HNSWStats& search_stats, - VisitedTable& vt) const; + VisitedTable& vt, + const SearchParametersHNSW* params = nullptr) const; void reset(); @@ -224,7 +227,8 @@ struct HNSW { DistanceComputer& qdis, std::priority_queue& input, std::vector& output, - int max_size); + int max_size, + bool keep_max_size_level0 = false); void permute_entries(const idx_t* map); }; diff --git a/thirdparty/faiss/faiss/impl/NNDescent.cpp b/thirdparty/faiss/faiss/impl/NNDescent.cpp index b609aba39..5afcdaf5b 100644 --- a/thirdparty/faiss/faiss/impl/NNDescent.cpp +++ b/thirdparty/faiss/faiss/impl/NNDescent.cpp @@ -154,15 +154,20 @@ NNDescent::NNDescent(const int d, const int K) : K(K), d(d) { NNDescent::~NNDescent() {} void NNDescent::join(DistanceComputer& qdis) { + idx_t check_period = InterruptCallback::get_period_hint(d * search_L); + for (idx_t i0 = 0; i0 < (idx_t)ntotal; i0 += check_period) { + idx_t i1 = std::min(i0 + check_period, (idx_t)ntotal); #pragma omp parallel for default(shared) schedule(dynamic, 100) - for (int n = 0; n < ntotal; n++) { - graph[n].join([&](int i, int j) { - if (i != j) { - float dist = qdis.symmetric_dis(i, j); - graph[i].insert(j, dist); - graph[j].insert(i, dist); - } - }); + for (idx_t n = i0; n < i1; n++) { + graph[n].join([&](int i, int j) { + if (i != j) { + float dist = qdis.symmetric_dis(i, j); + graph[i].insert(j, dist); + graph[j].insert(i, dist); + } + }); + } + InterruptCallback::check(); } } diff --git a/thirdparty/faiss/faiss/impl/NSG.cpp b/thirdparty/faiss/faiss/impl/NSG.cpp index 1f30b576b..c97494334 100644 --- a/thirdparty/faiss/faiss/impl/NSG.cpp +++ b/thirdparty/faiss/faiss/impl/NSG.cpp @@ -25,35 +25,6 @@ namespace { // It needs to be smaller than 0 constexpr int EMPTY_ID = -1; -/* Wrap the distance computer into one that negates the - distances. This makes supporting INNER_PRODUCE search easier */ - -struct NegativeDistanceComputer : DistanceComputer { - /// owned by this - DistanceComputer* basedis; - - explicit NegativeDistanceComputer(DistanceComputer* basedis) - : basedis(basedis) {} - - void set_query(const float* x) override { - basedis->set_query(x); - } - - /// compute distance of vector i to current query - float operator()(idx_t i) override { - return -(*basedis)(i); - } - - /// compute distance between two stored vectors - float symmetric_dis(idx_t i, idx_t j) override { - return -basedis->symmetric_dis(i, j); - } - - ~NegativeDistanceComputer() override { - delete basedis; - } -}; - } // namespace DistanceComputer* storage_distance_computer(const Index* storage) { diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizer.cpp b/thirdparty/faiss/faiss/impl/ScalarQuantizer.cpp index 2c81a3558..449cded8c 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizer.cpp +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizer.cpp @@ -75,6 +75,7 @@ void ScalarQuantizer::set_derived_sizes() { case QT_8bit: case QT_8bit_uniform: case QT_8bit_direct: + case QT_8bit_direct_signed: code_size = d; bits = 8; break; @@ -91,6 +92,10 @@ void ScalarQuantizer::set_derived_sizes() { code_size = d * 2; bits = 16; break; + case QT_bf16: + code_size = d * 2; + bits = 16; + break; } } @@ -127,6 +132,8 @@ void ScalarQuantizer::train(size_t n, const float* x) { break; case QT_fp16: case QT_8bit_direct: + case QT_bf16: + case QT_8bit_direct_signed: // no training necessary break; } diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizer.h b/thirdparty/faiss/faiss/impl/ScalarQuantizer.h index a6ac1a67c..2b4b856ad 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizer.h +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizer.h @@ -31,7 +31,10 @@ struct ScalarQuantizer : Quantizer { QT_4bit_uniform, QT_fp16, QT_8bit_direct, ///< fast indexing of uint8s - QT_6bit, ///< 6 bits per component + QT_6bit, ///< 6 bits per component, + QT_bf16, + QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from + ///< [-128 to 127] }; QuantizerType qtype = QT_8bit; diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec.h b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec.h index 220de4cef..6a20a0ca8 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec.h +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -227,6 +228,37 @@ struct QuantizerFP16<1> : SQuantizer { } }; +/******************************************************************* + * BF16 quantizer + *******************************************************************/ + +template +struct QuantizerBF16 {}; + +template <> +struct QuantizerBF16<1> : ScalarQuantizer::SQuantizer { + const size_t d; + + QuantizerBF16(size_t d, const std::vector& /* unused */) : d(d) {} + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + ((uint16_t*)code)[i] = encode_bf16(x[i]); + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + x[i] = decode_bf16(((uint16_t*)code)[i]); + } + } + + FAISS_ALWAYS_INLINE float reconstruct_component(const uint8_t* code, int i) + const { + return decode_bf16(((uint16_t*)code)[i]); + } +}; + /******************************************************************* * 8bit_direct quantizer *******************************************************************/ @@ -259,6 +291,38 @@ struct Quantizer8bitDirect<1> : SQuantizer { } }; +/******************************************************************* + * 8bit_direct_signed quantizer + *******************************************************************/ + +template +struct Quantizer8bitDirectSigned {}; + +template <> +struct Quantizer8bitDirectSigned<1> : ScalarQuantizer::SQuantizer { + const size_t d; + + Quantizer8bitDirectSigned(size_t d, const std::vector& /* unused */) + : d(d) {} + + void encode_vector(const float* x, uint8_t* code) const final { + for (size_t i = 0; i < d; i++) { + code[i] = (uint8_t)(x[i] + 128); + } + } + + void decode_vector(const uint8_t* code, float* x) const final { + for (size_t i = 0; i < d; i++) { + x[i] = code[i] - 128; + } + } + + FAISS_ALWAYS_INLINE float reconstruct_component(const uint8_t* code, int i) + const { + return code[i] - 128; + } +}; + template SQuantizer* select_quantizer_1( QuantizerType qtype, @@ -282,8 +346,12 @@ SQuantizer* select_quantizer_1( d, trained); case ScalarQuantizer::QT_fp16: return new QuantizerFP16(d, trained); + case ScalarQuantizer::QT_bf16: + return new QuantizerBF16(d, trained); case ScalarQuantizer::QT_8bit_direct: return new Quantizer8bitDirect(d, trained); + case ScalarQuantizer::QT_8bit_direct_signed: + return new Quantizer8bitDirectSigned(d, trained); } FAISS_THROW_MSG("unknown qtype"); } @@ -511,6 +579,10 @@ SQDistanceComputer* select_distance_computer( return new DCTemplate, Sim, SIMDWIDTH>( d, trained); + case ScalarQuantizer::QT_bf16: + return new DCTemplate, Sim, SIMDWIDTH>( + d, trained); + case ScalarQuantizer::QT_8bit_direct: if (d % 16 == 0) { return new DistanceComputerByte(d, trained); @@ -520,6 +592,12 @@ SQDistanceComputer* select_distance_computer( Sim, SIMDWIDTH>(d, trained); } + + case ScalarQuantizer::QT_8bit_direct_signed: + return new DCTemplate< + Quantizer8bitDirectSigned, + Sim, + SIMDWIDTH>(d, trained); } FAISS_THROW_MSG("unknown qtype"); return nullptr; @@ -613,6 +691,11 @@ InvertedListScanner* sel1_InvertedListScanner( QuantizerFP16, Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); + case ScalarQuantizer::QT_bf16: + return sel2_InvertedListScanner, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); case ScalarQuantizer::QT_8bit_direct: if (sq->d % 16 == 0) { return sel2_InvertedListScanner< @@ -624,6 +707,11 @@ InvertedListScanner* sel1_InvertedListScanner( Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } + case ScalarQuantizer::QT_8bit_direct_signed: + return sel2_InvertedListScanner, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } FAISS_THROW_MSG("unknown qtype"); diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx.h b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx.h index 6bc7a62dd..fc1ad255b 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx.h +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx.h @@ -190,6 +190,33 @@ struct QuantizerFP16_avx<8> : public QuantizerFP16<1> { } }; +/******************************************************************* + * BF16 quantizer + *******************************************************************/ + +template +struct QuantizerBF16_avx {}; + +template <> +struct QuantizerBF16_avx<1> : public QuantizerBF16<1> { + QuantizerBF16_avx(size_t d, const std::vector& unused) + : QuantizerBF16<1>(d, unused) {} +}; + +template <> +struct QuantizerBF16_avx<8> : public QuantizerBF16<1> { + QuantizerBF16_avx(size_t d, const std::vector& trained) + : QuantizerBF16<1>(d, trained) {} + + FAISS_ALWAYS_INLINE __m256 + reconstruct_8_components(const uint8_t* code, int i) const { + __m128i code_128i = _mm_loadu_si128((const __m128i*)(code + 2 * i)); + __m256i code_256i = _mm256_cvtepu16_epi32(code_128i); + code_256i = _mm256_slli_epi32(code_256i, 16); + return _mm256_castsi256_ps(code_256i); + } +}; + /******************************************************************* * 8bit_direct quantizer *******************************************************************/ @@ -216,6 +243,34 @@ struct Quantizer8bitDirect_avx<8> : public Quantizer8bitDirect<1> { } }; +/******************************************************************* + * 8bit_direct_signed quantizer + *******************************************************************/ + +template +struct Quantizer8bitDirectSigned_avx {}; + +template <> +struct Quantizer8bitDirectSigned_avx<1> : public Quantizer8bitDirectSigned<1> { + Quantizer8bitDirectSigned_avx(size_t d, const std::vector& unused) + : Quantizer8bitDirectSigned(d, unused) {} +}; + +template <> +struct Quantizer8bitDirectSigned_avx<8> : public Quantizer8bitDirectSigned<1> { + Quantizer8bitDirectSigned_avx(size_t d, const std::vector& trained) + : Quantizer8bitDirectSigned<1>(d, trained) {} + + FAISS_ALWAYS_INLINE __m256 + reconstruct_8_components(const uint8_t* code, int i) const { + __m128i x8 = _mm_loadl_epi64((__m128i*)(code + i)); // 8 * int8 + __m256i y8 = _mm256_cvtepu8_epi32(x8); // 8 * int32 + __m256i c8 = _mm256_set1_epi32(128); + __m256i z8 = _mm256_sub_epi32(y8, c8); // subtract 128 from all lanes + return _mm256_cvtepi32_ps(z8); // 8 * float32 + } +}; + template SQuantizer* select_quantizer_1_avx( QuantizerType qtype, @@ -239,8 +294,12 @@ SQuantizer* select_quantizer_1_avx( d, trained); case QuantizerType::QT_fp16: return new QuantizerFP16_avx(d, trained); + case QuantizerType::QT_bf16: + return new QuantizerBF16_avx(d, trained); case QuantizerType::QT_8bit_direct: return new Quantizer8bitDirect_avx(d, trained); + case QuantizerType::QT_8bit_direct_signed: + return new Quantizer8bitDirectSigned_avx(d, trained); } FAISS_THROW_MSG("unknown qtype"); } @@ -581,6 +640,12 @@ SQDistanceComputer* select_distance_computer_avx( Sim, SIMDWIDTH>(d, trained); + case QuantizerType::QT_bf16: + return new DCTemplate_avx< + QuantizerBF16_avx, + Sim, + SIMDWIDTH>(d, trained); + case QuantizerType::QT_8bit_direct: if (d % 16 == 0) { return new DistanceComputerByte_avx(d, trained); @@ -590,6 +655,12 @@ SQDistanceComputer* select_distance_computer_avx( Sim, SIMDWIDTH>(d, trained); } + + case ScalarQuantizer::QT_8bit_direct_signed: + return new DCTemplate_avx< + Quantizer8bitDirectSigned_avx, + Sim, + SIMDWIDTH>(d, trained); } FAISS_THROW_MSG("unknown qtype"); return nullptr; @@ -659,6 +730,11 @@ InvertedListScanner* sel1_InvertedListScanner_avx( QuantizerFP16_avx, Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); + case QuantizerType::QT_bf16: + return sel2_InvertedListScanner_avx, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); case QuantizerType::QT_8bit_direct: if (sq->d % 16 == 0) { return sel2_InvertedListScanner_avx< @@ -670,6 +746,11 @@ InvertedListScanner* sel1_InvertedListScanner_avx( Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } + case ScalarQuantizer::QT_8bit_direct_signed: + return sel2_InvertedListScanner_avx, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } FAISS_THROW_MSG("unknown qtype"); diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx512.h b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx512.h index b93ba9465..64e4c4a56 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx512.h +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_avx512.h @@ -204,6 +204,39 @@ struct QuantizerFP16_avx512<16> : public QuantizerFP16_avx<8> { } }; +/******************************************************************* + * BF16 quantizer + *******************************************************************/ + +template +struct QuantizerBF16_avx512 {}; + +template <> +struct QuantizerBF16_avx512<1> : public QuantizerBF16_avx<1> { + QuantizerBF16_avx512(size_t d, const std::vector& unused) + : QuantizerBF16_avx<1>(d, unused) {} +}; + +template <> +struct QuantizerBF16_avx512<8> : public QuantizerBF16_avx<8> { + QuantizerBF16_avx512(size_t d, const std::vector& trained) + : QuantizerBF16_avx<8>(d, trained) {} +}; + +template <> +struct QuantizerBF16_avx512<16> : public QuantizerBF16_avx<8> { + QuantizerBF16_avx512(size_t d, const std::vector& trained) + : QuantizerBF16_avx<8>(d, trained) {} + + FAISS_ALWAYS_INLINE __m512 + reconstruct_16_components(const uint8_t* code, int i) const { + __m256i code_256i = _mm256_loadu_si256((const __m256i*)(code + 2 * i)); + __m512i code_512i = _mm512_cvtepu16_epi32(code_256i); + code_512i = _mm512_slli_epi32(code_512i, 16); + return _mm512_castsi512_ps(code_512i); + } +}; + /******************************************************************* * 8bit_direct quantizer *******************************************************************/ @@ -236,6 +269,40 @@ struct Quantizer8bitDirect_avx512<16> : public Quantizer8bitDirect_avx<8> { } }; +/******************************************************************* + * 8bit_direct_signed quantizer + *******************************************************************/ + +template +struct Quantizer8bitDirectSigned_avx512 {}; + +template <> +struct Quantizer8bitDirectSigned_avx512<1> : public Quantizer8bitDirectSigned_avx<1> { + Quantizer8bitDirectSigned_avx512(size_t d, const std::vector& unused) + : Quantizer8bitDirectSigned_avx<1>(d, unused) {} +}; + +template <> +struct Quantizer8bitDirectSigned_avx512<8> : public Quantizer8bitDirectSigned_avx<8> { + Quantizer8bitDirectSigned_avx512(size_t d, const std::vector& trained) + : Quantizer8bitDirectSigned_avx<8>(d, trained) {} +}; + +template <> +struct Quantizer8bitDirectSigned_avx512<16> : public Quantizer8bitDirectSigned_avx<8> { + Quantizer8bitDirectSigned_avx512(size_t d, const std::vector& trained) + : Quantizer8bitDirectSigned_avx<8>(d, trained) {} + + FAISS_ALWAYS_INLINE __m512 + reconstruct_16_components(const uint8_t* code, int i) const { + __m256i x16 = _mm256_loadu_si256((__m256i*)(code + i)); // 16 * int8 + __m512i y16 = _mm512_cvtepu8_epi16(x16); // 16 * int32 + __m512i c16 = _mm512_set1_epi32(128); + __m512i z16 = _mm512_sub_epi32(y16, c16); // subtract 128 from all lanes + return _mm512_cvtepi32_ps(z16); // 16 * float32 + } +}; + template SQuantizer* select_quantizer_1_avx512( QuantizerType qtype, @@ -269,8 +336,12 @@ SQuantizer* select_quantizer_1_avx512( SIMDWIDTH>(d, trained); case QuantizerType::QT_fp16: return new QuantizerFP16_avx512(d, trained); + case QuantizerType::QT_bf16: + return new QuantizerBF16_avx512(d, trained); case QuantizerType::QT_8bit_direct: return new Quantizer8bitDirect_avx512(d, trained); + case QuantizerType::QT_8bit_direct_signed: + return new Quantizer8bitDirectSigned_avx512(d, trained); } FAISS_THROW_MSG("unknown qtype"); } @@ -653,6 +724,12 @@ SQDistanceComputer* select_distance_computer_avx512( Sim, SIMDWIDTH>(d, trained); + case QuantizerType::QT_bf16: + return new DCTemplate_avx512< + QuantizerBF16_avx512, + Sim, + SIMDWIDTH>(d, trained); + case QuantizerType::QT_8bit_direct: if (d % 16 == 0) { return new DistanceComputerByte_avx512( @@ -663,6 +740,12 @@ SQDistanceComputer* select_distance_computer_avx512( Sim, SIMDWIDTH>(d, trained); } + + case ScalarQuantizer::QT_8bit_direct_signed: + return new DCTemplate_avx512< + Quantizer8bitDirectSigned_avx512, + Sim, + SIMDWIDTH>(d, trained); } FAISS_THROW_MSG("unknown qtype"); return nullptr; @@ -732,6 +815,11 @@ InvertedListScanner* sel1_InvertedListScanner_avx512( QuantizerFP16_avx512, Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); + case QuantizerType::QT_bf16: + return sel2_InvertedListScanner_avx512, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); case QuantizerType::QT_8bit_direct: if (sq->d % 16 == 0) { return sel2_InvertedListScanner_avx512< @@ -743,6 +831,11 @@ InvertedListScanner* sel1_InvertedListScanner_avx512( Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } + case ScalarQuantizer::QT_8bit_direct_signed: + return sel2_InvertedListScanner_avx512, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } FAISS_THROW_MSG("unknown qtype"); diff --git a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_neon.h b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_neon.h index f272784e9..25cc36503 100644 --- a/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_neon.h +++ b/thirdparty/faiss/faiss/impl/ScalarQuantizerCodec_neon.h @@ -159,6 +159,33 @@ struct QuantizerFP16_neon<8> : public QuantizerFP16<1> { } }; +/******************************************************************* + * BF16 quantizer + *******************************************************************/ + +template +struct QuantizerBF16_neon {}; + +template <> +struct QuantizerBF16_neon<1> : public QuantizerBF16<1> { + QuantizerBF16_neon(size_t d, const std::vector& unused) + : QuantizerBF16<1>(d, unused) {} +}; + +template <> +struct QuantizerBF16_neon<8> : public QuantizerBF16<1> { + QuantizerBF16_neon(size_t d, const std::vector& trained) + : QuantizerBF16<1>(d, trained) {} + + FAISS_ALWAYS_INLINE float32x4x2_t + reconstruct_8_components(const uint8_t* code, int i) const { + uint16x4x2_t codei = vld1_u16_x2((const uint16_t*)(code + 2 * i)); + return {vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(codei.val[0]), 16)), + vreinterpretq_f32_u32( + vshlq_n_u32(vmovl_u16(codei.val[1]), 16))}; + } +}; + /******************************************************************* * 8bit_direct quantizer *******************************************************************/ @@ -179,13 +206,48 @@ struct Quantizer8bitDirect_neon<8> : public Quantizer8bitDirect<1> { FAISS_ALWAYS_INLINE float32x4x2_t reconstruct_8_components(const uint8_t* code, int i) const { - float32_t result[8] = {}; - for (size_t j = 0; j < 8; j++) { - result[j] = code[i + j]; - } - float32x4_t res1 = vld1q_f32(result); - float32x4_t res2 = vld1q_f32(result + 4); - return {res1, res2}; + uint8x8_t x8 = vld1_u8((const uint8_t*)(code + i)); + uint16x8_t y8 = vmovl_u8(x8); + uint16x4_t y8_0 = vget_low_u16(y8); + uint16x4_t y8_1 = vget_high_u16(y8); + + // convert uint16 -> uint32 -> fp32 + return {vcvtq_f32_u32(vmovl_u16(y8_0)), vcvtq_f32_u32(vmovl_u16(y8_1))}; + } +}; + +/******************************************************************* + * 8bit_direct_signed quantizer + *******************************************************************/ + +template +struct Quantizer8bitDirectSigned_neon {}; + +template <> +struct Quantizer8bitDirectSigned_neon<1> : public Quantizer8bitDirectSigned<1> { + Quantizer8bitDirectSigned_neon(size_t d, const std::vector& unused) + : Quantizer8bitDirectSigned(d, unused) {} +}; + +template <> +struct Quantizer8bitDirectSigned_neon<8> : public Quantizer8bitDirectSigned<1> { + Quantizer8bitDirectSigned_neon(size_t d, const std::vector& trained) + : Quantizer8bitDirectSigned<1>(d, trained) {} + + FAISS_ALWAYS_INLINE float32x4x2_t + reconstruct_8_components(const uint8_t* code, int i) const { + uint8x8_t x8 = vld1_u8((const uint8_t*)(code + i)); + uint16x8_t y8 = vmovl_u8(x8); // convert uint8 -> uint16 + uint16x4_t y8_0 = vget_low_u16(y8); + uint16x4_t y8_1 = vget_high_u16(y8); + + float32x4_t z8_0 = vcvtq_f32_u32( + vmovl_u16(y8_0)); // convert uint16 -> uint32 -> fp32 + float32x4_t z8_1 = vcvtq_f32_u32(vmovl_u16(y8_1)); + + // subtract 128 to convert into signed numbers + return {vsubq_f32(z8_0, vmovq_n_f32(128.0)), + vsubq_f32(z8_1, vmovq_n_f32(128.0))}; } }; @@ -212,8 +274,12 @@ SQuantizer* select_quantizer_1_neon( d, trained); case QuantizerType::QT_fp16: return new QuantizerFP16_neon(d, trained); + case QuantizerType::QT_bf16: + return new QuantizerBF16_neon(d, trained); case QuantizerType::QT_8bit_direct: return new Quantizer8bitDirect_neon(d, trained); + case QuantizerType::QT_8bit_direct_signed: + return new Quantizer8bitDirectSigned_neon(d, trained); } FAISS_THROW_MSG("unknown qtype"); } @@ -556,6 +622,12 @@ SQDistanceComputer* select_distance_computer_neon( Sim, SIMDWIDTH>(d, trained); + case QuantizerType::QT_bf16: + return new DCTemplate_neon< + QuantizerBF16_neon, + Sim, + SIMDWIDTH>(d, trained); + case QuantizerType::QT_8bit_direct: if (d % 16 == 0) { return new DistanceComputerByte_neon(d, trained); @@ -565,6 +637,12 @@ SQDistanceComputer* select_distance_computer_neon( Sim, SIMDWIDTH>(d, trained); } + + case ScalarQuantizer::QT_8bit_direct_signed: + return new DCTemplate_neon< + Quantizer8bitDirectSigned_neon, + Sim, + SIMDWIDTH>(d, trained); } FAISS_THROW_MSG("unknown qtype"); return nullptr; @@ -634,6 +712,11 @@ InvertedListScanner* sel1_InvertedListScanner_neon( QuantizerFP16_neon, Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); + case QuantizerType::QT_bf16: + return sel2_InvertedListScanner_neon, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); case QuantizerType::QT_8bit_direct: if (sq->d % 16 == 0) { return sel2_InvertedListScanner_neon< @@ -645,6 +728,11 @@ InvertedListScanner* sel1_InvertedListScanner_neon( Similarity, SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } + case ScalarQuantizer::QT_8bit_direct_signed: + return sel2_InvertedListScanner_neon, + Similarity, + SIMDWIDTH>>(sq, quantizer, store_pairs, sel, r); } FAISS_THROW_MSG("unknown qtype"); diff --git a/thirdparty/faiss/faiss/impl/code_distance/code_distance-avx2.h b/thirdparty/faiss/faiss/impl/code_distance/code_distance-avx2.h index 0aa1535b2..d37b02244 100644 --- a/thirdparty/faiss/faiss/impl/code_distance/code_distance-avx2.h +++ b/thirdparty/faiss/faiss/impl/code_distance/code_distance-avx2.h @@ -16,6 +16,11 @@ #include #include +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78782 +#if defined(__GNUC__) && __GNUC__ < 9 +#define _mm_loadu_si64(x) (_mm_loadl_epi64((__m128i_u*)x)) +#endif + namespace { inline float horizontal_sum(const __m128 v) { diff --git a/thirdparty/faiss/faiss/impl/index_read.cpp b/thirdparty/faiss/faiss/impl/index_read.cpp index 165683715..8fe5ad8e4 100644 --- a/thirdparty/faiss/faiss/impl/index_read.cpp +++ b/thirdparty/faiss/faiss/impl/index_read.cpp @@ -5,8 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - #include #include @@ -684,7 +682,11 @@ Index* read_index(IOReader* f, int io_flags) { Index* idx = nullptr; uint32_t h; READ1(h); - if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) { + if (h == fourcc("null")) { + // denotes a missing index, useful for some cases + return nullptr; + } else if ( + h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) { IndexFlat* idxf; if (h == fourcc("IxFI")) { idxf = new IndexFlatIP(); @@ -1137,7 +1139,7 @@ Index* read_index(IOReader* f, int io_flags) { idx = idxp; } else if ( h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") || - h == fourcc("IHN2")) { + h == fourcc("IHN2") || h == fourcc("IHNc")) { IndexHNSW* idxhnsw = nullptr; if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat(); @@ -1147,10 +1149,18 @@ Index* read_index(IOReader* f, int io_flags) { idxhnsw = new IndexHNSWSQ(); if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level(); + if (h == fourcc("IHNc")) + idxhnsw = new IndexHNSWCagra(); read_index_header(idxhnsw, f); + if (h == fourcc("IHNc")) { + READ1(idxhnsw->keep_max_size_level0); + auto idx_hnsw_cagra = dynamic_cast(idxhnsw); + READ1(idx_hnsw_cagra->base_level_only); + READ1(idx_hnsw_cagra->num_base_level_search_entrypoints); + } read_HNSW(&idxhnsw->hnsw, f); idxhnsw->storage = read_index(f, io_flags); - idxhnsw->own_fields = true; + idxhnsw->own_fields = idxhnsw->storage != nullptr; if (h == fourcc("IHNp") && !(io_flags & IO_FLAG_PQ_SKIP_SDC_TABLE)) { dynamic_cast(idxhnsw->storage)->pq.compute_sdc_table(); } diff --git a/thirdparty/faiss/faiss/impl/index_write.cpp b/thirdparty/faiss/faiss/impl/index_write.cpp index 21fc0bb11..d57c6edbf 100644 --- a/thirdparty/faiss/faiss/impl/index_write.cpp +++ b/thirdparty/faiss/faiss/impl/index_write.cpp @@ -5,8 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - #include #include @@ -556,8 +554,12 @@ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) { write_direct_map(&ivf->direct_map, f); } -void write_index(const Index* idx, IOWriter* f) { - if (const IndexFlat* idxf = dynamic_cast(idx)) { +void write_index(const Index* idx, IOWriter* f, int io_flags) { + if (idx == nullptr) { + // eg. for a storage component of HNSW that is set to nullptr + uint32_t h = fourcc("null"); + WRITE1(h); + } else if (const IndexFlat* idxf = dynamic_cast(idx)) { uint32_t h = fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" : idxf->metric_type == METRIC_L2 ? "IxF2" @@ -945,12 +947,24 @@ void write_index(const Index* idx, IOWriter* f) { : dynamic_cast(idx) ? fourcc("IHNp") : dynamic_cast(idx) ? fourcc("IHNs") : dynamic_cast(idx) ? fourcc("IHN2") + : dynamic_cast(idx) ? fourcc("IHNc") : 0; FAISS_THROW_IF_NOT(h != 0); WRITE1(h); write_index_header(idxhnsw, f); + if (h == fourcc("IHNc")) { + WRITE1(idxhnsw->keep_max_size_level0); + auto idx_hnsw_cagra = dynamic_cast(idxhnsw); + WRITE1(idx_hnsw_cagra->base_level_only); + WRITE1(idx_hnsw_cagra->num_base_level_search_entrypoints); + } write_HNSW(&idxhnsw->hnsw, f); - write_index(idxhnsw->storage, f); + if (io_flags & IO_FLAG_SKIP_STORAGE) { + uint32_t n4 = fourcc("null"); + WRITE1(n4); + } else { + write_index(idxhnsw->storage, f); + } } else if (const IndexNSG* idxnsg = dynamic_cast(idx)) { uint32_t h = dynamic_cast(idx) ? fourcc("INSf") : dynamic_cast(idx) ? fourcc("INSp") @@ -1030,14 +1044,15 @@ void write_index(const Index* idx, IOWriter* f) { } } -void write_index(const Index* idx, FILE* f) { + +void write_index(const Index* idx, FILE* f, int io_flags) { FileIOWriter writer(f); - write_index(idx, &writer); + write_index(idx, &writer, io_flags); } -void write_index(const Index* idx, const char* fname) { +void write_index(const Index* idx, const char* fname, int io_flags) { FileIOWriter writer(fname); - write_index(idx, &writer); + write_index(idx, &writer, io_flags); } // write index for offset-only index diff --git a/thirdparty/faiss/faiss/index_factory.cpp b/thirdparty/faiss/faiss/index_factory.cpp index 7416c41b0..78a810529 100644 --- a/thirdparty/faiss/faiss/index_factory.cpp +++ b/thirdparty/faiss/faiss/index_factory.cpp @@ -142,8 +142,12 @@ std::map sq_types = { {"SQ4", ScalarQuantizer::QT_4bit}, {"SQ6", ScalarQuantizer::QT_6bit}, {"SQfp16", ScalarQuantizer::QT_fp16}, + {"SQbf16", ScalarQuantizer::QT_bf16}, + {"SQ8_direct_signed", ScalarQuantizer::QT_8bit_direct_signed}, + {"SQ8_direct", ScalarQuantizer::QT_8bit_direct}, }; -const std::string sq_pattern = "(SQ4|SQ8|SQ6|SQfp16)"; +const std::string sq_pattern = + "(SQ4|SQ8|SQ6|SQfp16|SQbf16|SQ8_direct_signed|SQ8_direct)"; std::map aq_search_type = { {"_Nfloat", AdditiveQuantizer::ST_norm_float}, diff --git a/thirdparty/faiss/faiss/index_io.h b/thirdparty/faiss/faiss/index_io.h index 7ce6faf3a..a78b1493f 100644 --- a/thirdparty/faiss/faiss/index_io.h +++ b/thirdparty/faiss/faiss/index_io.h @@ -5,8 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - // I/O code for indexes #ifndef FAISS_INDEX_IO_H @@ -37,9 +35,12 @@ struct IOReader; struct IOWriter; struct InvertedLists; -void write_index(const Index* idx, const char* fname); -void write_index(const Index* idx, FILE* f); -void write_index(const Index* idx, IOWriter* writer); +/// skip the storage for graph-based indexes +const int IO_FLAG_SKIP_STORAGE = 1; + +void write_index(const Index* idx, const char* fname, int io_flags = 0); +void write_index(const Index* idx, FILE* f, int io_flags = 0); +void write_index(const Index* idx, IOWriter* writer, int io_flags = 0); void write_index_binary(const IndexBinary* idx, const char* fname); void write_index_binary(const IndexBinary* idx, FILE* f); diff --git a/thirdparty/faiss/faiss/invlists/InvertedLists.cpp b/thirdparty/faiss/faiss/invlists/InvertedLists.cpp index c8501b230..acf08c55b 100644 --- a/thirdparty/faiss/faiss/invlists/InvertedLists.cpp +++ b/thirdparty/faiss/faiss/invlists/InvertedLists.cpp @@ -5,8 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -// -*- c++ -*- - #include #include @@ -75,18 +73,10 @@ InvertedListsIterator::~InvertedListsIterator() {} ******************************************/ InvertedLists::InvertedLists(size_t nlist, size_t code_size) - : nlist(nlist), code_size(code_size), use_iterator(false) {} + : nlist(nlist), code_size(code_size) {} InvertedLists::~InvertedLists() {} -bool InvertedLists::is_empty(size_t list_no, void* inverted_list_context) - const { - return use_iterator ? !std::unique_ptr( - get_iterator(list_no, inverted_list_context)) - ->is_available() - : list_size(list_no) == 0; -} - idx_t InvertedLists::get_single_id(size_t list_no, size_t offset) const { assert(offset < list_size(list_no)); const idx_t* ids = get_ids(list_no); @@ -169,12 +159,6 @@ void InvertedLists::reset() { } } -InvertedListsIterator* InvertedLists::get_iterator( - size_t /*list_no*/, - void* /*inverted_list_context*/) const { - FAISS_THROW_MSG("get_iterator is not supported"); -} - void InvertedLists::merge_from(InvertedLists* oivf, size_t add_id) { #pragma omp parallel for for (idx_t i = 0; i < nlist; i++) { @@ -324,6 +308,54 @@ size_t InvertedLists::compute_ntotal() const { return tot; } +bool InvertedLists::is_empty(size_t list_no, void* inverted_list_context) + const { + if (use_iterator) { + return !std::unique_ptr( + get_iterator(list_no, inverted_list_context)) + ->is_available(); + } else { + FAISS_THROW_IF_NOT(inverted_list_context == nullptr); + return list_size(list_no) == 0; + } +} + +// implemnent iterator on top of get_codes / get_ids +namespace { + +struct CodeArrayIterator : InvertedListsIterator { + size_t list_size; + size_t code_size; + InvertedLists::ScopedCodes codes; + InvertedLists::ScopedIds ids; + size_t idx = 0; + + CodeArrayIterator(const InvertedLists* il, size_t list_no) + : list_size(il->list_size(list_no)), + code_size(il->code_size), + codes(il, list_no), + ids(il, list_no) {} + + bool is_available() const override { + return idx < list_size; + } + void next() override { + idx++; + } + std::pair get_id_and_codes() override { + return {ids[idx], codes.get() + code_size * idx}; + } +}; + +} // namespace + +InvertedListsIterator* InvertedLists::get_iterator( + size_t list_no, + void* inverted_list_context) const { + FAISS_THROW_IF_NOT(inverted_list_context == nullptr); + return new CodeArrayIterator(this, list_no); +} + /***************************************** * ArrayInvertedLists implementation ******************************************/ @@ -366,6 +398,12 @@ size_t ArrayInvertedLists::list_size(size_t list_no) const { return ids[list_no].size(); } +bool ArrayInvertedLists::is_empty(size_t list_no, void* inverted_list_context) + const { + FAISS_THROW_IF_NOT(inverted_list_context == nullptr); + return ids[list_no].size() == 0; +} + const uint8_t* ArrayInvertedLists::get_codes(size_t list_no) const { assert(list_no < nlist); return codes[list_no].data(); diff --git a/thirdparty/faiss/faiss/invlists/InvertedLists.h b/thirdparty/faiss/faiss/invlists/InvertedLists.h index 951df3376..bd4220017 100644 --- a/thirdparty/faiss/faiss/invlists/InvertedLists.h +++ b/thirdparty/faiss/faiss/invlists/InvertedLists.h @@ -67,7 +67,9 @@ struct InvertedListsIterator { struct InvertedLists { size_t nlist; ///< number of possible key values size_t code_size; ///< code size per vector in bytes - bool use_iterator; + + /// request to use iterator rather than get_codes / get_ids + bool use_iterator = false; InvertedLists(size_t nlist, size_t code_size); @@ -80,9 +82,6 @@ struct InvertedLists { /************************* * Read only functions */ - // check if the list is empty - bool is_empty(size_t list_no, void* inverted_list_context) const; - /// get the size of a list virtual size_t list_size(size_t list_no) const = 0; @@ -95,11 +94,6 @@ struct InvertedLists { // get the segment minimal number of a list (continuous storage can be regarded as 1-segment storage) virtual size_t get_segment_offset(size_t list_no, size_t segment_no) const; - /// get iterable for lists that use_iterator - virtual InvertedListsIterator* get_iterator( - size_t list_no, - void* inverted_list_context) const; - /** get the codes for an inverted list * must be released by release_codes * @@ -154,6 +148,18 @@ struct InvertedLists { /// a list can be -1 hence the signed long virtual void prefetch_lists(const idx_t* list_nos, int nlist) const; + /***************************************** + * Iterator interface (with context) */ + + /// check if the list is empty + virtual bool is_empty(size_t list_no, void* inverted_list_context = nullptr) + const; + + /// get iterable for lists that use_iterator + virtual InvertedListsIterator* get_iterator( + size_t list_no, + void* inverted_list_context = nullptr) const; + /************************* * writing functions */ @@ -372,6 +378,9 @@ struct ArrayInvertedLists : InvertedLists { /// permute the inverted lists, map maps new_id to old_id void permute_invlists(const idx_t* map); + bool is_empty(size_t list_no, void* inverted_list_context = nullptr) + const override; + ~ArrayInvertedLists() override; }; diff --git a/thirdparty/faiss/faiss/utils/bf16.h b/thirdparty/faiss/faiss/utils/bf16.h new file mode 100644 index 000000000..ff0fbe898 --- /dev/null +++ b/thirdparty/faiss/faiss/utils/bf16.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace faiss { + +namespace { + +union fp32_bits { + uint32_t as_u32; + float as_f32; +}; + +} // namespace + +inline uint16_t encode_bf16(const float f) { + // Round off + fp32_bits fp; + fp.as_f32 = f; + return static_cast((fp.as_u32 + 0x8000) >> 16); +} + +inline float decode_bf16(const uint16_t v) { + fp32_bits fp; + fp.as_u32 = (uint32_t(v) << 16); + return fp.as_f32; +} + +} // namespace faiss diff --git a/thirdparty/faiss/faiss/utils/extra_distances-inl.h b/thirdparty/faiss/faiss/utils/extra_distances-inl.h index 4df72b0d7..25ce3643c 100644 --- a/thirdparty/faiss/faiss/utils/extra_distances-inl.h +++ b/thirdparty/faiss/faiss/utils/extra_distances-inl.h @@ -8,6 +8,7 @@ /** In this file are the implementations of extra metrics beyond L2 * and inner product */ +#include #include #include @@ -135,4 +136,35 @@ inline float VectorDistance::operator()( return accu_num / accu_den; } +template <> +inline float VectorDistance::operator()( + const float* x, + const float* y) const { + // https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.nan_euclidean_distances.html + float accu = 0; + size_t present = 0; + for (size_t i = 0; i < d; i++) { + if (!std::isnan(x[i]) && !std::isnan(y[i])) { + float diff = x[i] - y[i]; + accu += diff * diff; + present++; + } + } + if (present == 0) { + return NAN; + } + return float(d) / float(present) * accu; +} + +template <> +inline float VectorDistance::operator()( + const float* x, + const float* y) const { + float accu = 0; + for (size_t i = 0; i < d; i++) { + accu += fabs(x[i] * y[i]); + } + return accu; +} + } // namespace faiss diff --git a/thirdparty/faiss/faiss/utils/extra_distances.cpp b/thirdparty/faiss/faiss/utils/extra_distances.cpp index 520ed6737..0403ec82c 100644 --- a/thirdparty/faiss/faiss/utils/extra_distances.cpp +++ b/thirdparty/faiss/faiss/utils/extra_distances.cpp @@ -51,17 +51,19 @@ void pairwise_extra_distances_template( } } -template +template void knn_extra_metrics_template( VD vd, const float* x, const float* y, size_t nx, size_t ny, - HeapArray* res, + size_t k, + float* distances, + int64_t* labels, const IDSelector* sel = nullptr) { - size_t k = res->k; size_t d = vd.d; + using C = typename VD::C; size_t check_period = InterruptCallback::get_period_hint(ny * d); check_period *= omp_get_max_threads(); @@ -73,8 +75,8 @@ void knn_extra_metrics_template( const float* x_i = x + i * d; const float* y_j = y; size_t j; - float* simi = res->get_val(i); - int64_t* idxi = res->get_ids(i); + float* simi = distances + k * i; + int64_t* idxi = labels + k * i; // maxheap_heapify(k, simi, idxi); heap_heapify(k, simi, idxi); @@ -82,10 +84,7 @@ void knn_extra_metrics_template( if (!sel || sel->is_member(j)) { float disij = vd(x_i, y_j); - // if (disij < simi[0]) { - if ((!vd.is_similarity && (disij < simi[0])) || - (vd.is_similarity && (disij > simi[0]))) { - // maxheap_replace_top(k, simi, idxi, disij, j); + if (C::cmp(simi[0], disij)) { heap_replace_top(k, simi, idxi, disij, j); } } @@ -168,13 +167,14 @@ void pairwise_extra_distances( HANDLE_VAR(JensenShannon); HANDLE_VAR(Lp); HANDLE_VAR(Jaccard); + HANDLE_VAR(NaNEuclidean); + HANDLE_VAR(ABS_INNER_PRODUCT); #undef HANDLE_VAR default: FAISS_THROW_MSG("metric type not implemented"); } } -template void knn_extra_metrics( const float* x, const float* y, @@ -183,14 +183,16 @@ void knn_extra_metrics( size_t ny, MetricType mt, float metric_arg, - HeapArray* res, + size_t k, + float* distances, + int64_t* indexes, const IDSelector* sel) { switch (mt) { -#define HANDLE_VAR(kw) \ - case METRIC_##kw: { \ - VectorDistance vd = {(size_t)d, metric_arg}; \ - knn_extra_metrics_template(vd, x, y, nx, ny, res, sel); \ - break; \ +#define HANDLE_VAR(kw) \ + case METRIC_##kw: { \ + VectorDistance vd = {(size_t)d, metric_arg}; \ + knn_extra_metrics_template(vd, x, y, nx, ny, k, distances, indexes, sel); \ + break; \ } HANDLE_VAR(L2); HANDLE_VAR(L1); @@ -200,34 +202,14 @@ void knn_extra_metrics( HANDLE_VAR(JensenShannon); HANDLE_VAR(Lp); HANDLE_VAR(Jaccard); + HANDLE_VAR(NaNEuclidean); + HANDLE_VAR(ABS_INNER_PRODUCT); #undef HANDLE_VAR default: FAISS_THROW_MSG("metric type not implemented"); } } -template void knn_extra_metrics>( - const float* x, - const float* y, - size_t d, - size_t nx, - size_t ny, - MetricType mt, - float metric_arg, - HeapArray>* res, - const IDSelector* sel = nullptr); - -template void knn_extra_metrics>( - const float* x, - const float* y, - size_t d, - size_t nx, - size_t ny, - MetricType mt, - float metric_arg, - HeapArray>* res, - const IDSelector* sel = nullptr); - FlatCodesDistanceComputer* get_extra_distance_computer( size_t d, MetricType mt, @@ -249,6 +231,8 @@ FlatCodesDistanceComputer* get_extra_distance_computer( HANDLE_VAR(JensenShannon); HANDLE_VAR(Lp); HANDLE_VAR(Jaccard); + HANDLE_VAR(NaNEuclidean); + HANDLE_VAR(ABS_INNER_PRODUCT); #undef HANDLE_VAR default: FAISS_THROW_MSG("metric type not implemented"); diff --git a/thirdparty/faiss/faiss/utils/extra_distances.h b/thirdparty/faiss/faiss/utils/extra_distances.h index 800b85a92..d786279a3 100644 --- a/thirdparty/faiss/faiss/utils/extra_distances.h +++ b/thirdparty/faiss/faiss/utils/extra_distances.h @@ -34,7 +34,6 @@ void pairwise_extra_distances( int64_t ldb = -1, int64_t ldd = -1); -template void knn_extra_metrics( const float* x, const float* y, @@ -43,7 +42,9 @@ void knn_extra_metrics( size_t ny, MetricType mt, float metric_arg, - HeapArray* res, + size_t k, + float* distances, + int64_t* indexes, const IDSelector* sel = nullptr); /** get a DistanceComputer that refers to this type of distance and diff --git a/thirdparty/faiss/faiss/utils/simdlib_neon.h b/thirdparty/faiss/faiss/utils/simdlib_neon.h index 439a5210b..1bdf0ed01 100644 --- a/thirdparty/faiss/faiss/utils/simdlib_neon.h +++ b/thirdparty/faiss/faiss/utils/simdlib_neon.h @@ -170,14 +170,10 @@ static inline std::string elements_to_string(const char* fmt, const S& simd) { for (size_t i = 0; i < N; ++i) { int bytesWritten = snprintf(ptr, sizeof(res) - (ptr - res), fmt, bytes[i]); - if (bytesWritten >= 0) { - ptr += bytesWritten; - } else { - break; - } + ptr += bytesWritten; } - // strip last , - + // The format usually contains a ',' separator so this is to remove the last + // separator. ptr[-1] = 0; return std::string(res); } diff --git a/thirdparty/faiss/tests/CMakeLists.txt b/thirdparty/faiss/tests/CMakeLists.txt index 14103c27c..4d1baf1c9 100644 --- a/thirdparty/faiss/tests/CMakeLists.txt +++ b/thirdparty/faiss/tests/CMakeLists.txt @@ -75,6 +75,8 @@ set(FAISS_TEST_SRC test_distances_if.cpp test_fastscan_perf.cpp test_disable_pq_sdc_tables.cpp + test_common_ivf_empty_index.cpp + test_callback.cpp ) add_executable(faiss_test ${FAISS_TEST_SRC}) diff --git a/thirdparty/faiss/tests/common_faiss_tests.py b/thirdparty/faiss/tests/common_faiss_tests.py index 8dc25edec..a8afe344e 100644 --- a/thirdparty/faiss/tests/common_faiss_tests.py +++ b/thirdparty/faiss/tests/common_faiss_tests.py @@ -49,7 +49,6 @@ def evalres(self, DI): for rank in 1, 10, 100: e[rank] = ((I[:, :rank] == self.gt.reshape(-1, 1)).sum() / float(self.nq)) - # print("1-recalls: %s" % e) return e diff --git a/thirdparty/faiss/tests/test_binary_hashindex.py b/thirdparty/faiss/tests/test_binary_hashindex.py index 2d3305057..e9a6eaca4 100644 --- a/thirdparty/faiss/tests/test_binary_hashindex.py +++ b/thirdparty/faiss/tests/test_binary_hashindex.py @@ -58,8 +58,6 @@ def test_hash(self): Lref, Dref, Iref = index_ref.range_search(xq, radius) - print("nb res: ", Lref[-1]) - index = faiss.IndexBinaryHash(d, 10) index.add(xb) # index.display() @@ -80,8 +78,6 @@ def test_hash(self): self.assertTrue(snew <= set(ref)) nfound.append(Lnew[-1]) ndis.append(stats.ndis) - print('nfound=', nfound) - print('ndis=', ndis) nfound = np.array(nfound) self.assertTrue(nfound[-1] == Lref[-1]) self.assertTrue(np.all(nfound[1:] >= nfound[:-1])) @@ -100,8 +96,6 @@ def test_multihash(self): Lref, Dref, Iref = index_ref.range_search(xq, radius) - print("nb res: ", Lref[-1]) - nfound = [] ndis = [] @@ -123,8 +117,6 @@ def test_multihash(self): self.assertTrue(snew <= set(ref)) nfound.append(Lnew[-1]) ndis.append(stats.ndis) - print('nfound=', nfound) - print('ndis=', ndis) nfound = np.array(nfound) # self.assertTrue(nfound[-1] == Lref[-1]) self.assertTrue(np.all(nfound[1:] >= nfound[:-1])) @@ -163,7 +155,6 @@ def test_hash_and_multihash(self): # no duplicates self.assertTrue(len(new) == len(snew)) nf += len(set(ref) & snew) - print('nfound', nh, nbit, nf) nfound[(nh, nbit)] = nf self.assertGreater(nfound[(nh, 4)], nfound[(nh, 7)]) @@ -175,7 +166,6 @@ def test_hash_and_multihash(self): np.testing.assert_array_equal(Inew, I2) np.testing.assert_array_equal(Dnew, D2) - print('nfound=', nfound) self.assertGreater(3, abs(nfound[(0, 7)] - nfound[(1, 7)])) self.assertGreater(nfound[(3, 7)], nfound[(1, 7)]) self.assertGreater(nfound[(5, 7)], nfound[(3, 7)]) diff --git a/thirdparty/faiss/tests/test_build_blocks.py b/thirdparty/faiss/tests/test_build_blocks.py index 0a97e6318..fdf9ad8bd 100644 --- a/thirdparty/faiss/tests/test_build_blocks.py +++ b/thirdparty/faiss/tests/test_build_blocks.py @@ -189,7 +189,6 @@ def test_l2(self): for d in 1, 2, 4, 8, 12, 16: x = rs.rand(d).astype('float32') for ny in 128, 129, 130: - print("d=%d ny=%d" % (d, ny)) y = rs.rand(ny, d).astype('float32') ref = ((x - y) ** 2).sum(1) new = np.zeros(ny, dtype='float32') @@ -204,7 +203,6 @@ def test_IP(self): for d in 1, 2, 4, 8, 12, 16: x = rs.rand(d).astype('float32') for ny in 128, 129, 130: - print("d=%d ny=%d" % (d, ny)) y = rs.rand(ny, d).astype('float32') ref = (x * y).sum(1) new = np.zeros(ny, dtype='float32') @@ -220,7 +218,6 @@ def test_0s(self): m = rs.rand(40, 20).astype('float32') m[5:10] = 0 comments = faiss.MatrixStats(m).comments - print(comments) assert 'has 5 copies' in comments assert '5 null vectors' in comments @@ -229,7 +226,6 @@ def test_copies(self): m = rs.rand(40, 20).astype('float32') m[::2] = m[1::2] comments = faiss.MatrixStats(m).comments - print(comments) assert '20 vectors are distinct' in comments def test_dead_dims(self): @@ -237,7 +233,6 @@ def test_dead_dims(self): m = rs.rand(40, 20).astype('float32') m[:, 5:10] = 0 comments = faiss.MatrixStats(m).comments - print(comments) assert '5 dimensions are constant' in comments def test_rogue_means(self): @@ -245,7 +240,6 @@ def test_rogue_means(self): m = rs.rand(40, 20).astype('float32') m[:, 5:10] += 12345 comments = faiss.MatrixStats(m).comments - print(comments) assert '5 dimensions are too large wrt. their variance' in comments def test_normalized(self): @@ -253,7 +247,6 @@ def test_normalized(self): m = rs.rand(40, 20).astype('float32') faiss.normalize_L2(m) comments = faiss.MatrixStats(m).comments - print(comments) assert 'vectors are normalized' in comments def test_hash(self): @@ -300,7 +293,6 @@ def test_8bit_equiv(self): D, I = index.search(x[3:], 1) # assert D[0, 0] == Dref[0, 0] - # print(D[0, 0], ((x[3] - x[2]) ** 2).sum()) assert D[0, 0] == ((x[3] - x[2]) ** 2).sum() def test_6bit_equiv(self): @@ -314,8 +306,6 @@ def test_6bit_equiv(self): d, faiss.ScalarQuantizer.QT_6bit) index.train(trainset) - print('cs=', index.code_size) - x = rs.randint(64, size=(100, d)).astype('float32') # verify encoder / decoder @@ -330,7 +320,6 @@ def test_6bit_equiv(self): for i in range(20): for j in range(10): dis = ((y[i] - x2[I[i, j]]) ** 2).sum() - # print(dis, D[i, j]) assert abs(D[i, j] - dis) / dis < 1e-5 def test_reconstruct(self): @@ -371,7 +360,6 @@ def test_randint(self): x = faiss.randint(20000, vmax=100) assert np.all(x >= 0) and np.all(x < 100) c = np.bincount(x, minlength=100) - print(c) assert c.max() - c.min() < 50 * 2 def test_rand_vector(self): @@ -473,7 +461,6 @@ def do_test_array_type(self, dtype): """ tests swig_ptr and rev_swig_ptr for this type of array """ a = np.arange(12).astype(dtype) ptr = faiss.swig_ptr(a) - print(ptr) a2 = faiss.rev_swig_ptr(ptr, 12) np.testing.assert_array_equal(a, a2) @@ -547,7 +534,6 @@ def subtest(self, d, K, metric): recalls += 1 break recall = 1.0 * recalls / (nb * K) - print('Metric: {}, knng accuracy: {}'.format(metric_names[metric], recall)) assert recall > 0.99 def test_small_nndescent(self): @@ -656,7 +642,6 @@ def do_test_bucket_sort_inplace( rows, _ = np.where(tab == b) rows.sort() tab2[lims[b]:lims[b + 1]].sort() - # print(rows, tab2[lims[b] : lims[b + 1]]) rows = set(rows) self.assertEqual(rows, set(tab2[lims[b]:lims[b + 1]])) diff --git a/thirdparty/faiss/tests/test_callback.cpp b/thirdparty/faiss/tests/test_callback.cpp new file mode 100644 index 000000000..cdfadf1d3 --- /dev/null +++ b/thirdparty/faiss/tests/test_callback.cpp @@ -0,0 +1,37 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include + +TEST(TestCallback, timeout) { + int n = 1000; + int k = 100; + int d = 128; + int niter = 1000000000; + int seed = 42; + + std::vector vecs(n * d); + faiss::float_rand(vecs.data(), vecs.size(), seed); + + auto index(new faiss::IndexFlat(d)); + + faiss::ClusteringParameters cp; + cp.niter = niter; + cp.verbose = false; + + faiss::Clustering kmeans(d, k, cp); + + faiss::TimeoutCallback::reset(0.010); + EXPECT_THROW(kmeans.train(n, vecs.data(), *index), faiss::FaissException); + delete index; +} diff --git a/thirdparty/faiss/tests/test_callback_py.py b/thirdparty/faiss/tests/test_callback_py.py new file mode 100644 index 000000000..0ec176dd8 --- /dev/null +++ b/thirdparty/faiss/tests/test_callback_py.py @@ -0,0 +1,32 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import unittest +import numpy as np +import faiss + + +class TestCallbackPy(unittest.TestCase): + def setUp(self) -> None: + super().setUp() + + def test_timeout(self) -> None: + n = 1000 + k = 100 + d = 128 + niter = 1_000_000_000 + + x = np.random.rand(n, d).astype('float32') + index = faiss.IndexFlat(d) + + cp = faiss.ClusteringParameters() + cp.niter = niter + cp.verbose = False + + kmeans = faiss.Clustering(d, k, cp) + + with self.assertRaises(RuntimeError): + with faiss.TimeoutGuard(0.010): + kmeans.train(x, index) diff --git a/thirdparty/faiss/tests/test_clustering.py b/thirdparty/faiss/tests/test_clustering.py index 2b81fc3e3..b1afc8523 100644 --- a/thirdparty/faiss/tests/test_clustering.py +++ b/thirdparty/faiss/tests/test_clustering.py @@ -110,9 +110,6 @@ def test_weighted(self): cdis2_first = cdis2[:5].sum() cdis2_last = cdis2[5:].sum() - print(cdis1_first, cdis1_last) - print(cdis2_first, cdis2_last) - # with the new clustering, the last should be much (*2) closer # to their centroids self.assertGreater(cdis1_last, cdis1_first * 2) diff --git a/thirdparty/faiss/tests/test_common_ivf_empty_index.cpp b/thirdparty/faiss/tests/test_common_ivf_empty_index.cpp new file mode 100644 index 000000000..a3e33031b --- /dev/null +++ b/thirdparty/faiss/tests/test_common_ivf_empty_index.cpp @@ -0,0 +1,144 @@ +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* This demonstrates how to query several independent IVF indexes with a trained + *index in common. This avoids to duplicate the coarse quantizer and metadata + *in memory. + **/ + +namespace { + +int d = 64; + +} // namespace + +std::vector get_random_vectors(size_t n, int seed) { + std::vector x(n * d); + faiss::rand_smooth_vectors(n, d, x.data(), seed); + seed++; + return x; +} + +/** InvetedLists implementation that dispatches the search to an InvertedList + * object that is passed in at query time */ + +struct DispatchingInvertedLists : faiss::ReadOnlyInvertedLists { + DispatchingInvertedLists(size_t nlist, size_t code_size) + : faiss::ReadOnlyInvertedLists(nlist, code_size) { + use_iterator = true; + } + + faiss::InvertedListsIterator* get_iterator( + size_t list_no, + void* inverted_list_context = nullptr) const override { + assert(inverted_list_context); + auto il = + static_cast(inverted_list_context); + return il->get_iterator(list_no); + } + + using idx_t = faiss::idx_t; + + size_t list_size(size_t list_no) const override { + FAISS_THROW_MSG("use iterator interface"); + } + const uint8_t* get_codes(size_t list_no) const override { + FAISS_THROW_MSG("use iterator interface"); + } + const idx_t* get_ids(size_t list_no) const override { + FAISS_THROW_MSG("use iterator interface"); + } +}; + +TEST(COMMON, test_common_trained_index) { + int N = 3; // number of independent indexes + int nt = 500; // training vectors + int nb = 200; // nb database vectors per index + int nq = 10; // nb queries performed on each index + int k = 4; // restults requested per query + + // construct and build an "empty index": a trained index that does not + // itself hold any data + std::unique_ptr empty_index(dynamic_cast( + faiss::index_factory(d, "IVF32,PQ8np"))); + auto xt = get_random_vectors(nt, 123); + empty_index->train(nt, xt.data()); + empty_index->nprobe = 4; + + // reference run: build one index for each set of db / queries and record + // results + std::vector> ref_I(N); + + for (int i = 0; i < N; i++) { + // clone the empty index + std::unique_ptr index( + faiss::clone_index(empty_index.get())); + auto xb = get_random_vectors(nb, 1234 + i); + auto xq = get_random_vectors(nq, 12345 + i); + // add vectors and perform a search + index->add(nb, xb.data()); + std::vector D(k * nq); + std::vector I(k * nq); + index->search(nq, xq.data(), k, D.data(), I.data()); + // record result as reference + ref_I[i] = I; + } + + // build a set of inverted lists for each independent index + std::vector sub_invlists; + + for (int i = 0; i < N; i++) { + // swap in other inverted lists + sub_invlists.emplace_back(empty_index->nlist, empty_index->code_size); + faiss::InvertedLists* invlists = &sub_invlists.back(); + + // replace_invlists swaps in a new InvertedLists for an existing index + empty_index->replace_invlists(invlists, false); + empty_index->reset(); // reset id counter to 0 + // populate inverted lists + auto xb = get_random_vectors(nb, 1234 + i); + empty_index->add(nb, xb.data()); + } + + // perform search dispatching to the sub-invlists. At search time, we don't + // use replace_invlists because that would wreak havoc in a multithreaded + // context + DispatchingInvertedLists di(empty_index->nlist, empty_index->code_size); + empty_index->replace_invlists(&di, false); + + std::vector> new_I(N); + + // run searches in the independent indexes but with a common empty_index +#pragma omp parallel for + for (int i = 0; i < N; i++) { + auto xq = get_random_vectors(nq, 12345 + i); + std::vector D(k * nq); + std::vector I(k * nq); + + // here we set to what sub-index the queries should be directed + faiss::SearchParametersIVF params; + params.nprobe = empty_index->nprobe; + params.inverted_list_context = &sub_invlists[i]; + + empty_index->search(nq, xq.data(), k, D.data(), I.data(), ¶ms); + new_I[i] = I; + } + + // compare with reference reslt + for (int i = 0; i < N; i++) { + ASSERT_EQ(ref_I[i], new_I[i]); + } +} diff --git a/thirdparty/faiss/tests/test_contrib.py b/thirdparty/faiss/tests/test_contrib.py index 0e7cbbfb0..05a2c4ac8 100644 --- a/thirdparty/faiss/tests/test_contrib.py +++ b/thirdparty/faiss/tests/test_contrib.py @@ -147,7 +147,6 @@ def test_query_iterator(self, metric=faiss.METRIC_L2): xb = ds.get_database() D, I = faiss.knn(xq, xb, 10, metric=metric) threshold = float(D[:, -1].mean()) - print(threshold) index = faiss.IndexFlat(32, metric) index.add(xb) @@ -251,7 +250,6 @@ def test_precision_recall(self): Inew = np.hstack(Inew) precision, recall = evaluation.range_PR(lims_ref, Iref, lims_new, Inew) - print(precision, recall) self.assertEqual(precision, 0.6) self.assertEqual(recall, 0.6) diff --git a/thirdparty/faiss/tests/test_contrib_with_scipy.py b/thirdparty/faiss/tests/test_contrib_with_scipy.py index cb81bb623..4f89e2fc1 100644 --- a/thirdparty/faiss/tests/test_contrib_with_scipy.py +++ b/thirdparty/faiss/tests/test_contrib_with_scipy.py @@ -44,7 +44,6 @@ def test_sparse_routines(self): faiss.normalize_L2(xt) mask = np.abs(xt) > 0.045 - # print("fraction:", mask.sum() / mask.size) # around 10% non-zeros xt[np.logical_not(mask)] = 0 centroids = ds.get_queries() @@ -72,7 +71,6 @@ def test_sparse_kmeans(self): faiss.normalize_L2(xt) mask = np.abs(xt) > 0.045 - # print("fraction:", mask.sum() / mask.size) # around 10% non-zeros xt[np.logical_not(mask)] = 0 km = faiss.Kmeans(ds.d, 50) diff --git a/thirdparty/faiss/tests/test_extra_distances.py b/thirdparty/faiss/tests/test_extra_distances.py index a474dd6ba..fcaf4d383 100644 --- a/thirdparty/faiss/tests/test_extra_distances.py +++ b/thirdparty/faiss/tests/test_extra_distances.py @@ -94,6 +94,33 @@ def test_jaccard(self): new_dis = faiss.pairwise_distances(xq, yb, faiss.METRIC_Jaccard) self.assertTrue(np.allclose(ref_dis, new_dis)) + def test_nan_euclidean(self): + xq, yb = self.make_example() + ref_dis = np.array([ + [scipy.spatial.distance.sqeuclidean(x, y) for y in yb] + for x in xq + ]) + new_dis = faiss.pairwise_distances(xq, yb, faiss.METRIC_NaNEuclidean) + self.assertTrue(np.allclose(ref_dis, new_dis)) + + x = [[3, np.nan, np.nan, 6]] + q = [[1, np.nan, np.nan, 5]] + dis = [(4 / 2 * ((3 - 1)**2 + (6 - 5)**2))] + new_dis = faiss.pairwise_distances(x, q, faiss.METRIC_NaNEuclidean) + self.assertTrue(np.allclose(new_dis, dis)) + + x = [[np.nan] * 4] + q = [[np.nan] * 4] + new_dis = faiss.pairwise_distances(x, q, faiss.METRIC_NaNEuclidean) + self.assertTrue(np.isnan(new_dis[0])) + + def test_abs_inner_product(self): + xq, yb = self.make_example() + dis = faiss.pairwise_distances(xq, yb, faiss.METRIC_ABS_INNER_PRODUCT) + + gt_dis = np.abs(xq @ yb.T) + np.testing.assert_allclose(dis, gt_dis, atol=1e-5) + class TestKNN(unittest.TestCase): """ test that the knn search gives the same as distance matrix + argmin """ diff --git a/thirdparty/faiss/tests/test_fast_scan.py b/thirdparty/faiss/tests/test_fast_scan.py index b061ee3af..cfe9636fe 100644 --- a/thirdparty/faiss/tests/test_fast_scan.py +++ b/thirdparty/faiss/tests/test_fast_scan.py @@ -34,7 +34,6 @@ def test_PQ4_accuracy(self): nq = Iref.shape[0] recall_at_1 = (Iref[:, 0] == Ia[:, 0]).sum() / nq assert recall_at_1 > 0.6 - # print(f'recall@1 = {recall_at_1:.3f}') # This is an experiment to see if we can catch performance @@ -498,7 +497,6 @@ def subtest_accuracy(self, aq, st, implem, metric_type='L2'): recall_ref = (Iref == gt).sum() / nq recall = (Ia == gt).sum() / nq - print(aq, st, implem, metric_type, recall_ref, recall) assert abs(recall_ref - recall) < 0.05 def xx_test_accuracy(self): @@ -531,7 +529,6 @@ def subtest_from_idxaq(self, implem, metric): nq = Iref.shape[0] recall_ref = (Iref == gt).sum() / nq recall1 = (I1 == gt).sum() / nq - print(recall_ref, recall1) assert abs(recall_ref - recall1) < 0.05 def xx_test_from_idxaq(self): diff --git a/thirdparty/faiss/tests/test_graph_based.py b/thirdparty/faiss/tests/test_graph_based.py index dd4212d71..c769e03ad 100644 --- a/thirdparty/faiss/tests/test_graph_based.py +++ b/thirdparty/faiss/tests/test_graph_based.py @@ -133,6 +133,42 @@ def test_ndis_stats(self): Dhnsw, Ihnsw = index.search(self.xq, 1) self.assertGreater(stats.ndis, len(self.xq) * index.hnsw.efSearch) + def test_io_no_storage(self): + d = self.xq.shape[1] + index = faiss.IndexHNSWFlat(d, 16) + index.add(self.xb) + + Dref, Iref = index.search(self.xq, 5) + + # test writing without storage + index2 = faiss.deserialize_index( + faiss.serialize_index(index, faiss.IO_FLAG_SKIP_STORAGE) + ) + self.assertEqual(index2.storage, None) + self.assertRaises( + RuntimeError, + index2.search, self.xb, 1) + + # make sure we can store an index with empty storage + index4 = faiss.deserialize_index( + faiss.serialize_index(index2)) + + # add storage afterwards + index.storage = faiss.clone_index(index.storage) + index.own_fields = True + + Dnew, Inew = index.search(self.xq, 5) + np.testing.assert_array_equal(Dnew, Dref) + np.testing.assert_array_equal(Inew, Iref) + + if False: + # test reading without storage + # not implemented because it is hard to skip over an index + index3 = faiss.deserialize_index( + faiss.serialize_index(index), faiss.IO_FLAG_SKIP_STORAGE + ) + self.assertEquals(index3.storage, None) + class TestNSG(unittest.TestCase): @@ -209,7 +245,6 @@ def subtest_add(self, build_type, thresh, metric=faiss.METRIC_L2): Dnsg, Insg = index.search(self.xq, 1) recalls = (Iref == Insg).sum() - print('metric: {}, nb equal: {}'.format(metrics[metric], recalls)) self.assertGreaterEqual(recalls, thresh) self.subtest_connectivity(index, self.xb.shape[0]) self.subtest_io_and_clone(index, Dnsg, Insg) @@ -230,7 +265,6 @@ def subtest_build(self, knn_graph, thresh, metric=faiss.METRIC_L2): Dnsg, Insg = index.search(self.xq, 1) recalls = (Iref == Insg).sum() - print('metric: {}, nb equal: {}'.format(metrics[metric], recalls)) self.assertGreaterEqual(recalls, thresh) self.subtest_connectivity(index, self.xb.shape[0]) @@ -286,7 +320,6 @@ def test_reset(self): index.add(self.xb) Dnsg, Insg = index.search(self.xq, 1) recalls = (Iref == Insg).sum() - print('metric: {}, nb equal: {}'.format(metrics[metric], recalls)) self.assertGreaterEqual(recalls, 475) self.subtest_connectivity(index, self.xb.shape[0]) @@ -294,7 +327,6 @@ def test_reset(self): index.add(self.xb) Dnsg, Insg = index.search(self.xq, 1) recalls = (Iref == Insg).sum() - print('metric: {}, nb equal: {}'.format(metrics[metric], recalls)) self.assertGreaterEqual(recalls, 475) self.subtest_connectivity(index, self.xb.shape[0]) @@ -335,7 +367,6 @@ def test_nsg_pq(self): # test accuracy recalls = (Iref == I).sum() - print("IndexNSGPQ", recalls) self.assertGreaterEqual(recalls, 190) # 193 # test I/O @@ -361,7 +392,6 @@ def test_nsg_sq(self): # test accuracy recalls = (Iref == I).sum() - print("IndexNSGSQ", recalls) self.assertGreaterEqual(recalls, 405) # 411 # test I/O @@ -395,7 +425,6 @@ def test_nndescentflat(self): # test accuracy recalls = (Iref == I).sum() - print("IndexNNDescentFlat", recalls) self.assertGreaterEqual(recalls, 450) # 462 # do some IO tests diff --git a/thirdparty/faiss/tests/test_index.py b/thirdparty/faiss/tests/test_index.py index f46c6a94b..43db906e4 100644 --- a/thirdparty/faiss/tests/test_index.py +++ b/thirdparty/faiss/tests/test_index.py @@ -327,7 +327,7 @@ def test_4variants_ivf(self): D, I = index.search(xq, 10) nok['flat'] = (I[:, 0] == I_ref[:, 0]).sum() - for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16".split(): + for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16 QT_bf16".split(): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexIVFScalarQuantizer(quantizer, d, ncent, qtype, faiss.METRIC_L2) @@ -338,7 +338,6 @@ def test_4variants_ivf(self): D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() - print(nok, nq) self.assertGreaterEqual(nok['flat'], nq * 0.6) # The tests below are a bit fragile, it happens that the @@ -350,6 +349,7 @@ def test_4variants_ivf(self): self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform']) self.assertGreaterEqual(nok['QT_fp16'], nok['QT_8bit']) + self.assertGreaterEqual(nok['QT_bf16'], nok['QT_8bit']) def test_4variants(self): d = 32 @@ -365,7 +365,7 @@ def test_4variants(self): nok = {} - for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16".split(): + for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16 QT_bf16".split(): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexScalarQuantizer(d, qtype, faiss.METRIC_L2) index.train(xt) @@ -373,13 +373,12 @@ def test_4variants(self): D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() - print(nok, nq) - self.assertGreaterEqual(nok['QT_8bit'], nq * 0.9) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform']) self.assertGreaterEqual(nok['QT_fp16'], nok['QT_8bit']) + self.assertGreaterEqual(nok['QT_bf16'], nq * 0.9) class TestRangeSearch(unittest.TestCase): @@ -442,7 +441,6 @@ def norm1(x): recons_err = np.mean(norm1(R_flat - xb[I_flat])) - print('Reconstruction error = %.3f' % recons_err) if eps is not None: self.assertLessEqual(recons_err, eps) @@ -638,7 +636,6 @@ def test_reconstuct_after_add(self): # should not raise an exception index.reconstruct(5) - print(index.ntotal) index.reconstruct(150) diff --git a/thirdparty/faiss/tests/test_index_accuracy.py b/thirdparty/faiss/tests/test_index_accuracy.py index 3f7bfbd30..2c5cf7b90 100644 --- a/thirdparty/faiss/tests/test_index_accuracy.py +++ b/thirdparty/faiss/tests/test_index_accuracy.py @@ -56,7 +56,6 @@ def test_ivf_kmeans(self): Dref, Iref = ivfk.search(ev.xq, 100) ivfk.parallel_mode = 1 Dnew, Inew = ivfk.search(ev.xq, 100) - print((Iref != Inew).sum(), Iref.size) assert (Iref != Inew).sum() < Iref.size / 5000.0 assert np.all(Dref == Dnew) @@ -136,8 +135,6 @@ def test_polysemous(self): res = ev.launch("Polysemous ht=%d" % index.polysemous_ht, index) e_polysemous = ev.evalres(res) - print(e_baseline, e_polysemous, index.polysemous_ht) - print(stats.n_hamming_pass, stats.ncode) # The randu dataset is difficult, so we are not too picky on # the results. Here we assert that we have < 10 % loss when # computing full PQ on fewer than 20% of the data. @@ -248,7 +245,6 @@ def subtest(self, mt): index.nprobe = 4 # hopefully more robust than 1 D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) - print("(%d, %s): %d, " % (mt, repr(qname), ninter)) assert abs(ninter - self.ref_results[(mt, qname)]) <= 10 if qname == "6bit": @@ -264,7 +260,6 @@ def subtest(self, mt): radius = float(D[:, -1].max()) else: radius = float(D[:, -1].min()) - # print("radius", radius) lims, D3, I3 = index.range_search(xq, radius) ntot = ndiff = 0 @@ -278,14 +273,11 @@ def subtest(self, mt): Iref = set(I2[i, mask]) ndiff += len(Inew ^ Iref) ntot += len(Iref) - # print("ndiff %d / %d" % (ndiff, ntot)) assert ndiff < ntot * 0.01 for pm in 1, 2: - # print("parallel_mode=%d" % pm) index.parallel_mode = pm lims4, D4, I4 = index.range_search(xq, radius) - # print("sizes", lims4[1:] - lims4[:-1]) for qno in range(len(lims) - 1): Iref = I3[lims[qno]: lims[qno + 1]] Inew = I4[lims4[qno]: lims4[qno + 1]] @@ -320,7 +312,7 @@ def test_parallel_mode(self): class TestSQByte(unittest.TestCase): - def subtest_8bit_direct(self, metric_type, d): + def subtest_8bit_direct(self, metric_type, d, quantizer_type): xt, xb, xq = get_dataset_2(d, 500, 1000, 30) # rescale everything to get integer @@ -332,16 +324,28 @@ def rescale(x): x[x > 255] = 255 return x - xt = rescale(xt) - xb = rescale(xb) - xq = rescale(xq) + def rescale_signed(x): + x = np.floor((x - tmin) * 256 / (tmax - tmin)) + x[x < 0] = 0 + x[x > 255] = 255 + x -= 128 + return x + + if quantizer_type == faiss.ScalarQuantizer.QT_8bit_direct_signed: + xt = rescale_signed(xt) + xb = rescale_signed(xb) + xq = rescale_signed(xq) + else: + xt = rescale(xt) + xb = rescale(xb) + xq = rescale(xq) gt_index = faiss.IndexFlat(d, metric_type) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexScalarQuantizer( - d, faiss.ScalarQuantizer.QT_8bit_direct, metric_type + d, quantizer_type, metric_type ) index.add(xb) D, I = index.search(xq, 10) @@ -361,7 +365,7 @@ def rescale(x): Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexIVFScalarQuantizer( - quantizer, d, nlist, faiss.ScalarQuantizer.QT_8bit_direct, + quantizer, d, nlist, quantizer_type, metric_type ) index.nprobe = 4 @@ -374,9 +378,10 @@ def rescale(x): assert np.all(D == Dref) def test_8bit_direct(self): - for d in 13, 16, 24: - for metric_type in faiss.METRIC_L2, faiss.METRIC_INNER_PRODUCT: - self.subtest_8bit_direct(metric_type, d) + for quantizer in faiss.ScalarQuantizer.QT_8bit_direct, faiss.ScalarQuantizer.QT_8bit_direct_signed: + for d in 13, 16, 24: + for metric_type in faiss.METRIC_L2, faiss.METRIC_INNER_PRODUCT: + self.subtest_8bit_direct(metric_type, d, quantizer) class TestNNDescent(unittest.TestCase): @@ -485,7 +490,6 @@ def subtest(self, mt): D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) - print("(%d, %s): %d, " % (mt, by_residual, ninter)) assert abs(ninter - self.ref_results[mt, by_residual]) <= 3 @@ -499,10 +503,6 @@ def subtest(self, mt): index.polysemous_ht = 20 D, I = index.search(xq, 10) ninter = faiss.eval_intersection(I, gt_I) - print( - "(%d, %s, %d): %d, " - % (mt, by_residual, index.polysemous_ht, ninter) - ) # polysemous behaves bizarrely on ARM assert ( @@ -516,7 +516,6 @@ def subtest(self, mt): radius = float(D[:, -1].max()) else: radius = float(D[:, -1].min()) - print("radius", radius) lims, D3, I3 = index.range_search(xq, radius) ntot = ndiff = 0 @@ -530,7 +529,6 @@ def subtest(self, mt): Iref = set(I2[i, mask]) ndiff += len(Inew ^ Iref) ntot += len(Iref) - print("ndiff %d / %d" % (ndiff, ntot)) assert ndiff < ntot * 0.02 def test_IVFPQ_non8bit(self): @@ -555,7 +553,6 @@ def test_IVFPQ_non8bit(self): D, I = index.search(xq, 10) ninter[v] = faiss.eval_intersection(I, gt_I) - print("ninter=", ninter) # this should be the case but we don't observe # that... Probavly too few test points # assert ninter['2x8'] > ninter['8x2'] @@ -623,9 +620,6 @@ def test_OPQ(self): res = ev.launch("OPQ", index) e_opq = ev.evalres(res) - print("e_pq=%s" % e_pq) - print("e_opq=%s" % e_opq) - # verify that OPQ better than PQ for r in 1, 10, 100: assert e_opq[r] > e_pq[r] @@ -656,7 +650,6 @@ def test_OIVFPQ(self): # verify same on OIVFPQ for r in 1, 10, 100: - print(e_oivfpq[r], e_ivfpq[r]) assert e_oivfpq[r] >= e_ivfpq[r] @@ -758,9 +751,6 @@ def test_sh(self): ninter = faiss.eval_intersection(I, gt_I) key = (nbit, tt, period) - print("(%d, %s, %g): %d, " % (nbit, repr(tt), period, - ninter)) - print(abs(ninter - self.ref_results[key])) assert abs(ninter - self.ref_results[key]) <= 14 @@ -799,7 +789,6 @@ def do_test(self, metric): # check that with refinement, the recall@10 is the same as # the original recall@100 recall2 = (I2 == Iref[:, :1]).sum() - # print("recalls", recall1, recall2) self.assertEqual(recall1, recall2) def test_IP(self): diff --git a/thirdparty/faiss/tests/test_index_binary.py b/thirdparty/faiss/tests/test_index_binary.py index b505e0ba1..7820cb662 100644 --- a/thirdparty/faiss/tests/test_index_binary.py +++ b/thirdparty/faiss/tests/test_index_binary.py @@ -100,6 +100,9 @@ def test_flat(self): index.add(self.xb) D, I = index.search(self.xq, 3) + I2 = index.assign(x=self.xq, k=3, labels=None) + assert np.all(I == I2) + for i in range(nq): for j, dj in zip(I[i], D[i]): ref_dis = binary_dis(self.xq[i], self.xb[j]) @@ -139,7 +142,6 @@ def test_range_search(self): self.assertTrue(set(range_res) <= set(I[i])) nt2 += 1 # in case of equality we have a problem with ties - print('nb tests', nt1, nt2) # nb tests is actually low... self.assertTrue(nt1 > 19 and nt2 > 19) @@ -284,8 +286,6 @@ def test_ivf_nprobe(self): ref_index.add(xb) ref_D, ref_I = ref_index.search(xq, k) - print(D[0], ref_D[0]) - print(I[0], ref_I[0]) assert np.all(D == ref_D) # assert np.all(I == ref_I) # id may be different diff --git a/thirdparty/faiss/tests/test_index_composite.py b/thirdparty/faiss/tests/test_index_composite.py index a760c0cf0..8d9b441ad 100644 --- a/thirdparty/faiss/tests/test_index_composite.py +++ b/thirdparty/faiss/tests/test_index_composite.py @@ -168,8 +168,6 @@ def test_remove_id_map_2(self): index.remove_ids(remove_set) index.add_with_ids(X[5:, :], idx[5:]) - print (index.search(X, 1)) - for i in range(10): _, searchres = index.search(X[i:i + 1, :], 1) if idx[i] in remove_set: @@ -954,7 +952,6 @@ def do_test(self, factory_string): index.nprobe = 10 Dref, Iref = index.search(ds.get_queries(), 10) - #print(index.search_and_return_codes) D, I, codes = index.search_and_return_codes( ds.get_queries(), 10, include_listnos=True) diff --git a/thirdparty/faiss/tests/test_io.py b/thirdparty/faiss/tests/test_io.py index dc8ac3dcf..99dfe6084 100644 --- a/thirdparty/faiss/tests/test_io.py +++ b/thirdparty/faiss/tests/test_io.py @@ -102,7 +102,6 @@ def test_buf_read(self): reader = faiss.BufferedIOReader(reader, bsz) y = np.zeros_like(x) - print('nbytes=', y.nbytes) reader(faiss.swig_ptr(y), y.nbytes, 1) np.testing.assert_array_equal(x, y) diff --git a/thirdparty/faiss/tests/test_ivf_index.cpp b/thirdparty/faiss/tests/test_ivf_index.cpp index 28e572e39..21ed0abdc 100644 --- a/thirdparty/faiss/tests/test_ivf_index.cpp +++ b/thirdparty/faiss/tests/test_ivf_index.cpp @@ -6,12 +6,14 @@ */ #include +#include #include #include #include #include #include #include +#include #include diff --git a/thirdparty/faiss/tests/test_ivflib.py b/thirdparty/faiss/tests/test_ivflib.py index f19c3da45..0a3fb8c87 100644 --- a/thirdparty/faiss/tests/test_ivflib.py +++ b/thirdparty/faiss/tests/test_ivflib.py @@ -125,7 +125,6 @@ def test_range_search_with_parameters(self): Dpre, _ = index.search(xq, 15) radius = float(np.median(Dpre[:, -1])) - print("Radius=", radius) stats = faiss.cvar.indexIVF_stats stats.reset() Lref, Dref, Iref = index.range_search(xq, radius) diff --git a/thirdparty/faiss/tests/test_local_search_quantizer.py b/thirdparty/faiss/tests/test_local_search_quantizer.py index 22231358e..797592981 100644 --- a/thirdparty/faiss/tests/test_local_search_quantizer.py +++ b/thirdparty/faiss/tests/test_local_search_quantizer.py @@ -196,7 +196,6 @@ def test_update_codebooks_with_double(self): err_float = eval_codec(lsq, xb) # 6533.377 vs 25457.99 - print(err_double, err_float) self.assertLess(err_double, err_float) def test_compute_binary_terms(self): @@ -314,7 +313,7 @@ def test_icm_encode(self): n, 1) - # do icm encoding without pre-computed unary and binary terms in Python + # do icm encoding without pre-computed unary and bianry terms in Python codebooks = faiss.vector_float_to_array(lsq.codebooks) codebooks = codebooks.reshape(M, K, d).copy() ref_codes = icm_encode_ref(x, codebooks, codes) @@ -348,7 +347,6 @@ def test_training(self): pq.train(xt) err_pq = eval_codec(pq, xb) - print(err_lsq, err_pq) self.assertLess(err_lsq, err_pq) @@ -463,7 +461,6 @@ def eval_index_accuracy(self, factory_key): index.nprobe = nprobe D, I = index.search(ds.get_queries(), 10) inter = faiss.eval_intersection(I, ds.get_groundtruth(10)) - # print("nprobe=", nprobe, "inter=", inter) inters.append(inter) inters = np.array(inters) @@ -528,7 +525,6 @@ def test_codec(self): pq.train(xt) err_pq = eval_codec(pq, xb) - print(err_plsq, err_pq) self.assertLess(err_plsq, err_pq) def test_with_lsq(self): @@ -549,7 +545,6 @@ def test_with_lsq(self): lsq.train(xt) err_lsq = eval_codec(lsq, xb) - print(err_plsq, err_lsq) self.assertEqual(err_plsq, err_lsq) def test_lut(self): @@ -664,7 +659,6 @@ def test_index_accuracy2(self): """check that the error is in the same ballpark as LSQ.""" inter1 = self.eval_index_accuracy("IVF32,PLSQ2x2x5_Nqint8") inter2 = self.eval_index_accuracy("IVF32,LSQ4x5_Nqint8") - # print(inter1, inter2) # 381 vs 374 self.assertGreaterEqual(inter1 * 1.1, inter2) def test_factory(self): diff --git a/thirdparty/faiss/tests/test_merge_index.py b/thirdparty/faiss/tests/test_merge_index.py index 4417f57fe..bdcc813f1 100644 --- a/thirdparty/faiss/tests/test_merge_index.py +++ b/thirdparty/faiss/tests/test_merge_index.py @@ -72,7 +72,6 @@ def do_test_merge(self, index_type): index.merge_from(indexes[i], index.ntotal) _D, I = index.search(xq, k) - print(I[:5, :6]) ndiff = (I != Iref).sum() print('%d / %d differences' % (ndiff, nq * k)) diff --git a/thirdparty/faiss/tests/test_meta_index.py b/thirdparty/faiss/tests/test_meta_index.py index d53cad48f..d0896e8ba 100644 --- a/thirdparty/faiss/tests/test_meta_index.py +++ b/thirdparty/faiss/tests/test_meta_index.py @@ -82,10 +82,8 @@ def test_shards(self): k = 32 ref_index = faiss.IndexFlatL2(d) - print('ref search') ref_index.add(xb) _Dref, Iref = ref_index.search(xq, k) - print(Iref[:5, :6]) shard_index = faiss.IndexShards(d) shard_index_2 = faiss.IndexShards(d, True, False) @@ -109,7 +107,6 @@ def test_shards(self): for test_no in range(3): with_threads = test_no == 1 - print('shard search test_no = %d' % test_no) if with_threads: remember_nt = faiss.omp_get_max_threads() faiss.omp_set_num_threads(1) @@ -122,14 +119,10 @@ def test_shards(self): else: _D, I = shard_index_2.search(xq, k) - print(I[:5, :6]) - if with_threads: faiss.omp_set_num_threads(remember_nt) ndiff = (I != Iref).sum() - - print('%d / %d differences' % (ndiff, nq * k)) assert (ndiff < nq * k / 1000.) def test_shards_ivf(self): diff --git a/thirdparty/faiss/tests/test_partition.py b/thirdparty/faiss/tests/test_partition.py index 02de7e8c2..fd41eabe1 100644 --- a/thirdparty/faiss/tests/test_partition.py +++ b/thirdparty/faiss/tests/test_partition.py @@ -49,7 +49,6 @@ def do_partition(self, n, q, maxval=None, seed=None): if seed is None: for i in range(50): self.do_partition(n, q, maxval, i + 1234) - # print("seed=", seed) rs = np.random.RandomState(seed) if maxval is None: vals = rs.rand(n).astype('float32') @@ -95,7 +94,6 @@ def do_partition(self, n, q, maxval=None, seed=None): if seed is None: for i in range(50): self.do_partition(n, q, maxval, i + 1234) - # print("seed=", seed) rs = np.random.RandomState(seed) if maxval is None: vals = rs.rand(n).astype('float32') @@ -148,7 +146,6 @@ def do_partition(self, n, q, maxval=65536, seed=None): for i in range(50): self.do_partition(n, q, maxval, i + 1234) - # print("seed=", seed) rs = np.random.RandomState(seed) vals = rs.randint(maxval, size=n).astype('uint16') ids = (rs.permutation(n) + 12345).astype('int64') @@ -160,7 +157,6 @@ def do_partition(self, n, q, maxval=65536, seed=None): tab_a = faiss.AlignedTableUint16() faiss.copy_array_to_AlignedTable(vals, tab_a) - # print("tab a type", tab_a.get()) if type(q) == int: faiss.CMax_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q, q, None) @@ -196,7 +192,6 @@ def do_partition(self, n, q, maxval=65536, seed=None): if seed is None: for i in range(50): self.do_partition(n, q, maxval, i + 1234) - # print("seed=", seed) rs = np.random.RandomState(seed) vals = rs.randint(maxval, size=n).astype('uint16') ids = (rs.permutation(n) + 12345).astype('int64') @@ -209,7 +204,6 @@ def do_partition(self, n, q, maxval=65536, seed=None): vals_inv = (65535 - vals).astype('uint16') faiss.copy_array_to_AlignedTable(vals_inv, tab_a) - # print("tab a type", tab_a.get()) if type(q) == int: faiss.CMin_uint16_partition_fuzzy( tab_a.get(), sp(ids), n, q, q, None) diff --git a/thirdparty/faiss/tests/test_product_quantizer.py b/thirdparty/faiss/tests/test_product_quantizer.py index 1cdee7f14..f531cab2a 100644 --- a/thirdparty/faiss/tests/test_product_quantizer.py +++ b/thirdparty/faiss/tests/test_product_quantizer.py @@ -26,7 +26,6 @@ def test_pq(self): x2 = pq.decode(codes) diff = ((x - x2)**2).sum() - # print("diff=", diff) # diff= 4418.0562 self.assertGreater(5000, diff) @@ -71,7 +70,6 @@ def do_test_codec(self, nbit): def test_codec(self): for i in range(16): - print("Testing nbits=%d" % (i + 1)) self.do_test_codec(i + 1) diff --git a/thirdparty/faiss/tests/test_residual_quantizer.py b/thirdparty/faiss/tests/test_residual_quantizer.py index e37ee3efe..f4381607e 100644 --- a/thirdparty/faiss/tests/test_residual_quantizer.py +++ b/thirdparty/faiss/tests/test_residual_quantizer.py @@ -211,7 +211,6 @@ def test_training(self): # in practice RQ is often better than PQ but it does not the case here, so just check # that we are within some factor. - # print(err_pq, err_rq) self.assertLess(err_rq, err_pq * 1.2) def test_beam_size(self): @@ -321,10 +320,8 @@ def retrain_AQ_codebook(index, xt): x_decoded = index.sa_decode(codes_packed) MSE = ((xt - x_decoded) ** 2).sum() / n - # print(f"Initial MSE on training set: {MSE:g}") codes = unpack_codes(index.rq, codes_packed) - # print("ref codes", codes[0]) codebook_offsets = faiss.vector_to_array(rq.codebook_offsets) # build sparse code matrix (represented as a dense matrix) @@ -343,7 +340,6 @@ def retrain_AQ_codebook(index, xt): B, residuals, rank, singvals = scipy.linalg.lstsq(C, xt, ) MSE = ((C @ B - xt) ** 2).sum() / n - # print(f"MSE after retrainining: {MSE:g}") # replace codebook # faiss.copy_array_to_vector(B.astype('float32').ravel(), index.rq.codebooks) @@ -503,7 +499,6 @@ def test_reestimate_codebook_2(self): xt_decoded = ir.sa_decode(ir.sa_encode(xt)) err_after_refined = ((xt - xt_decoded) ** 2).sum() - # print(err_before, err_after_refined) # ref run 7474.98 / 7006.1777 self.assertGreater(err_before, err_after_refined * 1.06) @@ -781,7 +776,6 @@ def test_search_L2(self): else: inter_2 = faiss.eval_intersection(I2, gt) self.assertGreaterEqual(inter_ref, inter_2) - # print(st, inter_ref, inter_2) ########################################################### @@ -814,7 +808,6 @@ def do_test_accuracy(self, by_residual, st): index.nprobe = nprobe D, I = index.search(ds.get_queries(), 10) inter = faiss.eval_intersection(I, ds.get_groundtruth(10)) - # print(st, "nprobe=", nprobe, "inter=", inter) inters.append(inter) # do a little I/O test @@ -909,18 +902,13 @@ def do_test_accuracy_IP(self, by_residual): D, I = index.search(ds.get_queries(), 10) index.rq.search_type = faiss.AdditiveQuantizer.ST_LUT_nonorm D2, I2 = index.search(ds.get_queries(), 10) - # print(D[:5] - D2[:5]) - # print(I[:5]) np.testing.assert_array_almost_equal(D, D2, decimal=5) # there are many ties because the codes are so short self.assertLess((I != I2).sum(), I.size * 0.1) # D2, I2 = index2.search(ds.get_queries(), 10) - # print(D[:5]) - # print(D2[:5]) inter = faiss.eval_intersection(I, ds.get_groundtruth(10)) - # print("nprobe=", nprobe, "inter=", inter) inters.append(inter) self.assertTrue(np.all(inters[1:4] >= inters[:3])) @@ -979,8 +967,6 @@ def beam_search_encode_step_tab(codes, L, distances, codebook_cross_prods_i, for b in range(beam_size): dotprods[i, b, :] += cb[codes[i, b, j]] - # print("dps", dotprods[:3, :2, :4]) - new_distances += 2 * dotprods cent_distances = new_distances @@ -1166,7 +1152,6 @@ def test_codec(self): pq.train(xt) err_pq = eval_codec(pq, xb) - # print(err_prq, err_pq) self.assertLess(err_prq, err_pq) def test_with_rq(self): @@ -1187,7 +1172,6 @@ def test_with_rq(self): rq.train(xt) err_rq = eval_codec(rq, xb) - # print(err_prq, err_rq) self.assertEqual(err_prq, err_rq) @@ -1271,7 +1255,6 @@ def test_index_accuracy2(self): """check that the error is in the same ballpark as RQ.""" inter1 = self.eval_index_accuracy("IVF100,PRQ2x2x5_Nqint8") inter2 = self.eval_index_accuracy("IVF100,RQ4x5_Nqint8") - # print(inter1, inter2) # 392 vs 374 self.assertGreaterEqual(inter1 * 1.1, inter2) def test_factory(self): diff --git a/thirdparty/faiss/tests/test_rowwise_minmax.py b/thirdparty/faiss/tests/test_rowwise_minmax.py index dbd14de38..53e6c00b1 100644 --- a/thirdparty/faiss/tests/test_rowwise_minmax.py +++ b/thirdparty/faiss/tests/test_rowwise_minmax.py @@ -45,7 +45,6 @@ def compare_train_vs_train_inplace(self, factory_key): # make sure that the reconstruction error is not crazy reconstruction_err = ((x - decoded) ** 2).sum() - print(reconstruction_err) self.assertLess(reconstruction_err, 0.6) diff --git a/thirdparty/faiss/tests/test_search_params.py b/thirdparty/faiss/tests/test_search_params.py index 22b845c2e..886ffc0c6 100644 --- a/thirdparty/faiss/tests/test_search_params.py +++ b/thirdparty/faiss/tests/test_search_params.py @@ -465,7 +465,6 @@ def test_12_92(self): sp = faiss.swig_ptr selr.find_sorted_ids_bounds( len(ids), sp(ids), sp(j01[:1]), sp(j01[1:])) - print(j01) assert j01[0] >= j01[1] diff --git a/thirdparty/faiss/tests/test_standalone_codec.py b/thirdparty/faiss/tests/test_standalone_codec.py index 7fdcf6849..391b88b9d 100644 --- a/thirdparty/faiss/tests/test_standalone_codec.py +++ b/thirdparty/faiss/tests/test_standalone_codec.py @@ -151,7 +151,6 @@ def compare_accuracy(self, lowac, highac, max_errs=(1e10, 1e10)): err = ((x - x2) ** 2).sum() errs.append(err) - print(errs) self.assertGreater(errs[0], errs[1]) self.assertGreater(max_errs[0], errs[0]) @@ -174,6 +173,9 @@ def test_SQ2(self): def test_SQ3(self): self.compare_accuracy('SQ8', 'SQfp16') + def test_SQ4(self): + self.compare_accuracy('SQ8', 'SQbf16') + def test_PQ(self): self.compare_accuracy('PQ6x8np', 'PQ8x8np') @@ -214,7 +216,6 @@ def test_repeats(self): code = repeats.encode(swig_ptr(vec)) vec2 = np.zeros(dim, dtype='float32') repeats.decode(code, swig_ptr(vec2)) - # print(vec2) assert np.all(vec == vec2) def test_ZnSphereCodec_encode_centroid(self): @@ -222,7 +223,6 @@ def test_ZnSphereCodec_encode_centroid(self): r2 = 5 ref_codec = faiss.ZnSphereCodec(dim, r2) codec = faiss.ZnSphereCodecRec(dim, r2) - # print(ref_codec.nv, codec.nv) assert ref_codec.nv == codec.nv s = set() for i in range(ref_codec.nv): @@ -237,7 +237,6 @@ def test_ZnSphereCodecRec(self): dim = 16 r2 = 6 codec = faiss.ZnSphereCodecRec(dim, r2) - # print("nv=", codec.nv) for i in range(codec.nv): c = np.zeros(dim, dtype='float32') codec.decode(i, swig_ptr(c)) @@ -300,15 +299,10 @@ def test_rw(self): for i in range(nbyte): self.assertTrue(((bignum >> (i * 8)) & 255) == bs[i]) - #for i in range(nbyte): - # print(bin(bs[i] + 256)[3:], end=' ') - # print() - br = faiss.BitstringReader(swig_ptr(bs), nbyte) for nbit, xref in ctrl: xnew = br.read(nbit) - # print('nbit %d xref %x xnew %x' % (nbit, xref, xnew)) self.assertTrue(xnew == xref) def test_arrays(self): diff --git a/thirdparty/faiss/tutorial/cpp/1-Flat.cpp b/thirdparty/faiss/tutorial/cpp/1-Flat.cpp index 819e41957..147fa89bc 100644 --- a/thirdparty/faiss/tutorial/cpp/1-Flat.cpp +++ b/thirdparty/faiss/tutorial/cpp/1-Flat.cpp @@ -83,10 +83,10 @@ int main() { printf("\n"); } - printf("I (5 last results)=\n"); + printf("D (5 last results)=\n"); for (int i = nq - 5; i < nq; i++) { for (int j = 0; j < k; j++) - printf("%5zd ", I[i * k + j]); + printf("%5f ", D[i * k + j]); printf("\n"); } diff --git a/thirdparty/faiss/tutorial/cpp/2-IVFFlat.cpp b/thirdparty/faiss/tutorial/cpp/2-IVFFlat.cpp index febd5be04..86530ae98 100644 --- a/thirdparty/faiss/tutorial/cpp/2-IVFFlat.cpp +++ b/thirdparty/faiss/tutorial/cpp/2-IVFFlat.cpp @@ -61,13 +61,10 @@ int main() { printf("\n"); } - index.nprobe = 10; - index.search(nq, xq, k, D, I); - - printf("I=\n"); + printf("D=\n"); for (int i = nq - 5; i < nq; i++) { for (int j = 0; j < k; j++) - printf("%5zd ", I[i * k + j]); + printf("%5f ", D[i * k + j]); printf("\n"); } diff --git a/thirdparty/faiss/tutorial/cpp/6-HNSW.cpp b/thirdparty/faiss/tutorial/cpp/6-HNSW.cpp new file mode 100644 index 000000000..9bd8cd3fa --- /dev/null +++ b/thirdparty/faiss/tutorial/cpp/6-HNSW.cpp @@ -0,0 +1,73 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include + +using idx_t = faiss::idx_t; + +int main() { + int d = 64; // dimension + int nb = 100000; // database size + int nq = 10000; // nb of queries + + std::mt19937 rng; + std::uniform_real_distribution<> distrib; + + float* xb = new float[d * nb]; + float* xq = new float[d * nq]; + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < d; j++) + xb[d * i + j] = distrib(rng); + xb[d * i] += i / 1000.; + } + + for (int i = 0; i < nq; i++) { + for (int j = 0; j < d; j++) + xq[d * i + j] = distrib(rng); + xq[d * i] += i / 1000.; + } + + int k = 4; + + faiss::IndexHNSWFlat index(d, 32); + index.add(nb, xb); + + { // search xq + idx_t* I = new idx_t[k * nq]; + float* D = new float[k * nq]; + + index.search(nq, xq, k, D, I); + + printf("I=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) + printf("%5zd ", I[i * k + j]); + printf("\n"); + } + + printf("D=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) + printf("%5f ", D[i * k + j]); + printf("\n"); + } + + delete[] I; + delete[] D; + } + + delete[] xb; + delete[] xq; + + return 0; +} diff --git a/thirdparty/faiss/tutorial/cpp/7-PQFastScan.cpp b/thirdparty/faiss/tutorial/cpp/7-PQFastScan.cpp new file mode 100644 index 000000000..4cdfea052 --- /dev/null +++ b/thirdparty/faiss/tutorial/cpp/7-PQFastScan.cpp @@ -0,0 +1,75 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include + +using idx_t = faiss::idx_t; + +int main() { + int d = 64; // dimension + int nb = 100000; // database size + int nq = 10000; // nb of queries + + std::mt19937 rng; + std::uniform_real_distribution<> distrib; + + float* xb = new float[(int)(d * nb)]; + float* xq = new float[(int)(d * nq)]; + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < d; j++) { + xb[d * i + j] = distrib(rng); + } + xb[d * i] += i / 1000.; + } + + for (int i = 0; i < nq; i++) { + for (int j = 0; j < d; j++) { + xq[d * i + j] = distrib(rng); + } + xq[d * i] += i / 1000.; + } + + int m = 8; + int n_bit = 4; + + faiss::IndexPQFastScan index(d, m, n_bit); + printf("Index is trained? %s\n", index.is_trained ? "true" : "false"); + index.train(nb, xb); + printf("Index is trained? %s\n", index.is_trained ? "true" : "false"); + index.add(nb, xb); + + int k = 4; + + { // search xq + idx_t* I = new idx_t[(int)(k * nq)]; + float* D = new float[(int)(k * nq)]; + + index.search(nq, xq, k, D, I); + + printf("I=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) { + printf("%5zd ", I[i * k + j]); + } + printf("\n"); + } + + delete[] I; + delete[] D; + } + + delete[] xb; + delete[] xq; + + return 0; +} // namespace facebook::detail diff --git a/thirdparty/faiss/tutorial/cpp/8-PQFastScanRefine.cpp b/thirdparty/faiss/tutorial/cpp/8-PQFastScanRefine.cpp new file mode 100644 index 000000000..2435d94d2 --- /dev/null +++ b/thirdparty/faiss/tutorial/cpp/8-PQFastScanRefine.cpp @@ -0,0 +1,84 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include + +using idx_t = faiss::idx_t; + +int main() { + int d = 64; // dimension + int nb = 100000; // database size + int nq = 10000; // nb of queries + + std::mt19937 rng; + std::uniform_real_distribution<> distrib; + + float* xb = new float[(int)(d * nb)]; + float* xq = new float[(int)(d * nq)]; + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < d; j++) { + xb[d * i + j] = distrib(rng); + } + xb[d * i] += i / 1000.; + } + + for (int i = 0; i < nq; i++) { + for (int j = 0; j < d; j++) { + xq[d * i + j] = distrib(rng); + } + xq[d * i] += i / 1000.; + } + + int m = 8; + int n_bit = 4; + + faiss::IndexPQFastScan index(d, m, n_bit); + faiss::IndexRefineFlat index_refine(&index); + // refine index after PQFastScan + + printf("Index is trained? %s\n", + index_refine.is_trained ? "true" : "false"); + index_refine.train(nb, xb); + printf("Index is trained? %s\n", + index_refine.is_trained ? "true" : "false"); + index_refine.add(nb, xb); + + int k = 4; + { // search xq + idx_t* I = new idx_t[(int)(k * nq)]; + float* D = new float[(int)(k * nq)]; + float k_factor = 3; + faiss::IndexRefineSearchParameters* params = + new faiss::IndexRefineSearchParameters(); + params->k_factor = k_factor; + index_refine.search(nq, xq, k, D, I, params); + + printf("I=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) { + printf("%5zd ", I[i * k + j]); + } + printf("\n"); + } + + delete[] I; + delete[] D; + delete params; + } + + delete[] xb; + delete[] xq; + + return 0; +} diff --git a/thirdparty/faiss/tutorial/cpp/9-RefineComparison.cpp b/thirdparty/faiss/tutorial/cpp/9-RefineComparison.cpp new file mode 100644 index 000000000..d7fbc90ae --- /dev/null +++ b/thirdparty/faiss/tutorial/cpp/9-RefineComparison.cpp @@ -0,0 +1,104 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include +#include +using idx_t = faiss::idx_t; + +int main() { + int d = 64; // dimension + int nb = 100000; // database size + int nq = 10000; // nb of queries + + std::mt19937 rng; + std::uniform_real_distribution<> distrib; + + float* xb = new float[(int)(d * nb)]; + float* xq = new float[(int)(d * nq)]; + + for (int i = 0; i < nb; i++) { + for (int j = 0; j < d; j++) { + xb[d * i + j] = distrib(rng); + } + xb[d * i] += i / 1000.; + } + + for (int i = 0; i < nq; i++) { + for (int j = 0; j < d; j++) { + xq[d * i + j] = distrib(rng); + } + xq[d * i] += i / 1000.; + } + + // Constructing the refine PQ index with SQfp16 with index factory + faiss::Index* index_fp16; + index_fp16 = faiss::index_factory( + d, "PQ32x4fs,Refine(SQfp16)", faiss::METRIC_L2); + index_fp16->train(nb, xb); + index_fp16->add(nb, xb); + + // Constructing the refine PQ index with SQ8 + faiss::Index* index_sq8; + index_sq8 = + faiss::index_factory(d, "PQ32x4fs,Refine(SQ8)", faiss::METRIC_L2); + index_sq8->train(nb, xb); + index_sq8->add(nb, xb); + + int k = 10; + { // search xq + idx_t* I_fp16 = new idx_t[(int)(k * nq)]; + float* D_fp16 = new float[(int)(k * nq)]; + idx_t* I_sq8 = new idx_t[(int)(k * nq)]; + float* D_sq8 = new float[(int)(k * nq)]; + + // Parameterization on k factor while doing search for index refinement + float k_factor = 3; + faiss::IndexRefineSearchParameters* params = + new faiss::IndexRefineSearchParameters(); + params->k_factor = k_factor; + + // Perform index search using different index refinement + index_fp16->search(nq, xq, k, D_fp16, I_fp16, params); + index_sq8->search(nq, xq, k, D_sq8, I_sq8, params); + + printf("I_fp16=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) { + printf("%5zd ", I_fp16[i * k + j]); + } + printf("\n"); + } + + printf("I_sq8=\n"); + for (int i = nq - 5; i < nq; i++) { + for (int j = 0; j < k; j++) { + printf("%5zd ", I_sq8[i * k + j]); + } + printf("\n"); + } + + delete[] I_fp16; + delete[] D_fp16; + delete[] I_sq8; + delete[] D_sq8; + delete params; + + delete index_fp16; + delete index_sq8; + } + + delete[] xb; + delete[] xq; + + return 0; +} diff --git a/thirdparty/faiss/tutorial/cpp/CMakeLists.txt b/thirdparty/faiss/tutorial/cpp/CMakeLists.txt index 7361b33a0..f964b3dda 100644 --- a/thirdparty/faiss/tutorial/cpp/CMakeLists.txt +++ b/thirdparty/faiss/tutorial/cpp/CMakeLists.txt @@ -18,3 +18,15 @@ target_link_libraries(4-GPU PRIVATE faiss) add_executable(5-Multiple-GPUs EXCLUDE_FROM_ALL 5-Multiple-GPUs.cpp) target_link_libraries(5-Multiple-GPUs PRIVATE faiss) + +add_executable(6-HNSW EXCLUDE_FROM_ALL 6-HNSW.cpp) +target_link_libraries(6-HNSW PRIVATE faiss) + +add_executable(7-PQFastScan EXCLUDE_FROM_ALL 7-PQFastScan.cpp) +target_link_libraries(7-PQFastScan PRIVATE faiss) + +add_executable(8-PQFastScanRefine EXCLUDE_FROM_ALL 8-PQFastScanRefine.cpp) +target_link_libraries(8-PQFastScanRefine PRIVATE faiss) + +add_executable(9-RefineComparison EXCLUDE_FROM_ALL 9-RefineComparison.cpp) +target_link_libraries(9-RefineComparison PRIVATE faiss) diff --git a/thirdparty/faiss/tutorial/python/7-PQFastScan.py b/thirdparty/faiss/tutorial/python/7-PQFastScan.py new file mode 100644 index 000000000..34d7a34ac --- /dev/null +++ b/thirdparty/faiss/tutorial/python/7-PQFastScan.py @@ -0,0 +1,35 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import faiss +import numpy as np + +d = 64 # dimension +nb = 100000 # database size +nq = 10000 # nb of queries +np.random.seed(1234) # make reproducible +xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries +xb[:, 0] += np.arange(nb) / 1000. +xq = np.random.random((nq, d)).astype('float32') +xq[:, 0] += np.arange(nq) / 1000. + +m = 8 # 8 specifies that the number of sub-vector is 8 +k = 4 # number of dimension in etracted vector +n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits +bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ +index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2, bbs) +# construct FastScan Index + +assert not index.is_trained +index.train(xb) # Train vectors data index within mockup database +assert index.is_trained + +index.add(xb) +D, I = index.search(xb[:5], k) # sanity check +print(I) +print(D) +index.nprobe = 10 # make comparable with experiment above +D, I = index.search(xq, k) # search +print(I[-5:]) # neighbors of the 5 last queries diff --git a/thirdparty/faiss/tutorial/python/8-PQFastScanRefine.py b/thirdparty/faiss/tutorial/python/8-PQFastScanRefine.py new file mode 100644 index 000000000..115a036fa --- /dev/null +++ b/thirdparty/faiss/tutorial/python/8-PQFastScanRefine.py @@ -0,0 +1,38 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import faiss +import numpy as np + +d = 64 # dimension +nb = 100000 # database size +nq = 10000 # nb of queries +np.random.seed(1234) # make reproducible +xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries +xb[:, 0] += np.arange(nb) / 1000. +xq = np.random.random((nq, d)).astype('float32') +xq[:, 0] += np.arange(nq) / 1000. + +m = 8 # 8 specifies that the number of sub-vector is 8 +k = 4 # number of dimension in etracted vector +n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits +bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ + +index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2) +index_refine = faiss.IndexRefineFlat(index) +# construct FastScan and run index refinement + +assert not index_refine.is_trained +index_refine.train(xb) # Train vectors data index within mockup database +assert index_refine.is_trained + +index_refine.add(xb) +params = faiss.IndexRefineSearchParameters(k_factor=3) +D, I = index_refine.search(xq[:5], 10, params=params) +print(I) +print(D) +index.nprobe = 10 # make comparable with experiment above +D, I = index.search(xq[:5], k) # search +print(I[-5:]) diff --git a/thirdparty/faiss/tutorial/python/9-RefineComparison.py b/thirdparty/faiss/tutorial/python/9-RefineComparison.py new file mode 100644 index 000000000..6fa69f33d --- /dev/null +++ b/thirdparty/faiss/tutorial/python/9-RefineComparison.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import faiss + +from faiss.contrib.evaluation import knn_intersection_measure +from faiss.contrib import datasets + +# 64-dim vectors, 50000 vectors in the training, 100000 in database, +# 10000 in queries, dtype ('float32') +ds = datasets.SyntheticDataset(64, 50000, 100000, 10000) +d = 64 # dimension + +# Constructing the refine PQ index with SQfp16 with index factory +index_fp16 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQfp16)') +index_fp16.train(ds.get_train()) +index_fp16.add(ds.get_database()) + +# Constructing the refine PQ index with SQ8 +index_sq8 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQ8)') +index_sq8.train(ds.get_train()) +index_sq8.add(ds.get_database()) + +# Parameterization on k factor while doing search for index refinement +k_factor = 3.0 +params = faiss.IndexRefineSearchParameters(k_factor=k_factor) + +# Perform index search using different index refinement +D_fp16, I_fp16 = index_fp16.search(ds.get_queries(), 100, params=params) +D_sq8, I_sq8 = index_sq8.search(ds.get_queries(), 100, params=params) + +# Calculating knn intersection measure for different index types on refinement +KIM_fp16 = knn_intersection_measure(I_fp16, ds.get_groundtruth()) +KIM_sq8 = knn_intersection_measure(I_sq8, ds.get_groundtruth()) + +# KNN intersection measure accuracy shows that choosing SQ8 impacts accuracy +assert (KIM_fp16 > KIM_sq8) + +print(I_sq8[:5]) +print(I_fp16[:5])