From 27ceb320faa8d88f16c9242dec99ddb9922951d1 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Mon, 26 Aug 2024 15:20:53 +0000 Subject: [PATCH 01/22] perf_tests --- CMakeLists.txt | 2 + perf_tests/CMakeLists.txt | 31 +++++++++++++ perf_tests/bench_6bit_codec.cpp | 81 +++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 perf_tests/CMakeLists.txt create mode 100644 perf_tests/bench_6bit_codec.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f1b30fa35..a7c1034987 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,8 @@ endif() add_subdirectory(demos) add_subdirectory(benchs) add_subdirectory(tutorial/cpp) +add_subdirectory(perf_tests) + # CTest must be included in the top level to enable `make test` target. include(CTest) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt new file mode 100644 index 0000000000..8740631c05 --- /dev/null +++ b/perf_tests/CMakeLists.txt @@ -0,0 +1,31 @@ +project(faiss_perf_tests) + + +include(DownloadProject) +download_project(PROJ googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG master + ${UPDATE_DISCONNECTED_IF_AVAILABLE} +) +add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) +include_directories("${googlebenchmark_SOURCE_DIR}/include") + +find_package(GoogleBenchmark REQUIRED) + +file(GLOB_RECURSE ALL_BENCH_CPP *.cpp) + +foreach(ONE_BENCH_CPP ${ALL_BENCH_CPP}) + + get_filename_component(ONE_BENCH_EXEC ${ONE_BENCH_CPP} NAME_WE) + + # Avoid name collision + set(TARGET_NAME Bench_${ONE_BENCH_EXEC}) + + add_executable(${TARGET_NAME} ${ONE_BENCH_CPP}) + set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME ${ONE_BENCH_EXEC}) + target_link_libraries(${TARGET_NAME} + ${CMAKE_THREAD_LIBS_INIT}) + + # If you want to run benchmarks with the "make test" command, uncomment me + add_test(${TARGET_NAME} ${ONE_BENCH_EXEC}) +endforeach() diff --git a/perf_tests/bench_6bit_codec.cpp b/perf_tests/bench_6bit_codec.cpp new file mode 100644 index 0000000000..b4ac0b04b6 --- /dev/null +++ b/perf_tests/bench_6bit_codec.cpp @@ -0,0 +1,81 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include +#include +#include + +using namespace faiss; + +static void bench(benchmark::State& state) { + int d = 128; + int n = 2000; + + std::vector x(d * n); + + float_rand(x.data(), d * n, 12345); + + // make sure it's idempotent + ScalarQuantizer sq(d, ScalarQuantizer::QT_6bit); + + omp_set_num_threads(1); + + sq.train(n, x.data()); + + size_t code_size = sq.code_size; + state.counters["code_size"] = sq.code_size; + + // encode + std::vector codes(code_size * n); + sq.compute_codes(x.data(), codes.data(), n); + + // decode + std::vector x2(d * n); + sq.decode(codes.data(), x2.data(), n); + + state.counters["sql2_recons_error"] = + fvec_L2sqr(x.data(), x2.data(), n * d) / n; + + // encode again + std::vector codes2(code_size * n); + sq.compute_codes(x2.data(), codes2.data(), n); + + size_t ndiff = 0; + for (size_t i = 0; i < codes.size(); i++) { + if (codes[i] != codes2[i]) + ndiff++; + } + + state.counters["ndiff_for_idempotence"] = ndiff; + + state.counters["code_size_two"] = codes.size(); + + std::unique_ptr dc( + sq.get_distance_computer()); + dc->codes = codes.data(); + dc->code_size = sq.code_size; + state.counters["code_size_three"] = dc->code_size; + + for (auto _ : state) { + float sum_dis = 0; + for (int i = 0; i < n; i++) { + dc->set_query(&x[i * d]); + for (int j = 0; j < n; j++) { + benchmark::DoNotOptimize(sum_dis += (*dc)(j)); + } + } + } +} +// I think maybe n and d should be input arguments +// for things to really make sense, idk. +BENCHMARK(bench)->Iterations(20); +BENCHMARK_MAIN(); From 23d204aff1a120a3b6c717eaad8387068e879aad Mon Sep 17 00:00:00 2001 From: mengdilin Date: Mon, 26 Aug 2024 15:39:01 +0000 Subject: [PATCH 02/22] working cmake producing build --- perf_tests/CMakeLists.txt | 75 ++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 8740631c05..52ebcb9f18 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -1,31 +1,64 @@ project(faiss_perf_tests) -include(DownloadProject) -download_project(PROJ googlebenchmark - GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG master - ${UPDATE_DISCONNECTED_IF_AVAILABLE} -) -add_subdirectory(${googlebenchmark_SOURCE_DIR} ${googlebenchmark_BINARY_DIR}) -include_directories("${googlebenchmark_SOURCE_DIR}/include") +include(FetchContent) +FetchContent_Declare(googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG main) # need main for benchmark::benchmark -find_package(GoogleBenchmark REQUIRED) +set(BENCHMARK_ENABLE_TESTING OFF) +FetchContent_MakeAvailable( + googlebenchmark) -file(GLOB_RECURSE ALL_BENCH_CPP *.cpp) +find_package(Threads REQUIRED) -foreach(ONE_BENCH_CPP ${ALL_BENCH_CPP}) +add_executable(faiss_perf_tests bench_6bit_codec.cpp) - get_filename_component(ONE_BENCH_EXEC ${ONE_BENCH_CPP} NAME_WE) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") + target_link_libraries(faiss_perf_tests PRIVATE faiss) +endif() - # Avoid name collision - set(TARGET_NAME Bench_${ONE_BENCH_EXEC}) +if(FAISS_OPT_LEVEL STREQUAL "avx2") + if(NOT WIN32) + target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma>) + else() + target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX2>) + endif() + target_link_libraries(faiss_perf_tests PRIVATE faiss_avx2) +endif() - add_executable(${TARGET_NAME} ${ONE_BENCH_CPP}) - set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME ${ONE_BENCH_EXEC}) - target_link_libraries(${TARGET_NAME} - ${CMAKE_THREAD_LIBS_INIT}) +if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX512>) + endif() + target_link_libraries(faiss_perf_tests PRIVATE faiss_avx512) +endif() - # If you want to run benchmarks with the "make test" command, uncomment me - add_test(${TARGET_NAME} ${ONE_BENCH_EXEC}) -endforeach() +if(FAISS_OPT_LEVEL STREQUAL "sve") + if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + else() + # TODO: support Windows + endif() + target_link_libraries(faiss_perf_tests PRIVATE faiss_sve) +endif() +target_link_libraries(faiss_perf_tests PRIVATE benchmark::benchmark) \ No newline at end of file From ee73068353e3db11ed50f2e257d658f9a5719eaf Mon Sep 17 00:00:00 2001 From: mengdilin Date: Mon, 26 Aug 2024 10:32:55 -0700 Subject: [PATCH 03/22] bench --- perf_tests/CMakeLists.txt | 98 +++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 52ebcb9f18..667a49f579 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -11,54 +11,70 @@ FetchContent_MakeAvailable( googlebenchmark) find_package(Threads REQUIRED) +find_package(OpenMP REQUIRED) -add_executable(faiss_perf_tests bench_6bit_codec.cpp) +file(GLOB files "${PROJECT_SOURCE_DIR}/*.cpp") +include(CMakePrintHelpers) -if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") - target_link_libraries(faiss_perf_tests PRIVATE faiss) -endif() +cmake_print_variables(files) -if(FAISS_OPT_LEVEL STREQUAL "avx2") - if(NOT WIN32) - target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma>) - else() - target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX2>) +cmake_print_variables(PROJECT_SOURCE_DIR) + +foreach(ONE_BENCH_CPP ${files}) + get_filename_component(ONE_BENCH_EXEC ${ONE_BENCH_CPP} NAME_WE) + set(TARGET_NAME bench_${ONE_BENCH_EXEC}) + cmake_print_variables(ONE_BENCH_EXEC) + + add_executable(${TARGET_NAME} ${ONE_BENCH_CPP}) + + if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") + target_link_libraries(${TARGET_NAME} PRIVATE faiss) endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_avx2) -endif() -if(FAISS_OPT_LEVEL STREQUAL "avx512") - if(NOT WIN32) - target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) - else() - target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX512>) + if(FAISS_OPT_LEVEL STREQUAL "avx2") + if(NOT WIN32) + target_compile_options(${TARGET_NAME} PRIVATE $<$:-mavx2 -mfma>) + else() + target_compile_options(${TARGET_NAME} PRIVATE $<$:/arch:AVX2>) + endif() + target_link_libraries(${TARGET_NAME} PRIVATE faiss_avx2) endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_avx512) -endif() -if(FAISS_OPT_LEVEL STREQUAL "sve") - if(NOT WIN32) - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(${TARGET_NAME} PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(${TARGET_NAME} PRIVATE $<$:/arch:AVX512>) endif() - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + target_link_libraries(${TARGET_NAME} PRIVATE faiss_avx512) + endif() + + if(FAISS_OPT_LEVEL STREQUAL "sve") + if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + else() + # TODO: support Windows endif() - else() - # TODO: support Windows + target_link_libraries(${TARGET_NAME} PRIVATE faiss_sve) endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_sve) -endif() -target_link_libraries(faiss_perf_tests PRIVATE benchmark::benchmark) \ No newline at end of file + + +target_link_libraries(${TARGET_NAME} PRIVATE OpenMP::OpenMP_CXX benchmark::benchmark) +endforeach() From aeef2b22668720e647023e7e57d1614868887e78 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Mon, 26 Aug 2024 10:50:21 -0700 Subject: [PATCH 04/22] add to cmake --- .github/actions/build_cmake/action.yml | 5 ++ perf_tests/CMakeLists.txt | 98 +++++++++++--------------- 2 files changed, 46 insertions(+), 57 deletions(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 1cc7818b10..2328972865 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -143,6 +143,11 @@ runs: run: | export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/" make -C build test + - name: C++ perf benchmarks + shell: bash + run: | + make -C build -j faiss_perf_tests + cd ./build/perf_tests && ./faiss_perf_tests - name: Install Python extension shell: bash working-directory: build/faiss/python diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 667a49f579..b877f9419c 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -11,70 +11,54 @@ FetchContent_MakeAvailable( googlebenchmark) find_package(Threads REQUIRED) -find_package(OpenMP REQUIRED) -file(GLOB files "${PROJECT_SOURCE_DIR}/*.cpp") -include(CMakePrintHelpers) +add_executable(faiss_perf_tests bench_6bit_codec.cpp) -cmake_print_variables(files) +if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") + target_link_libraries(faiss_perf_tests PRIVATE faiss) +endif() -cmake_print_variables(PROJECT_SOURCE_DIR) - -foreach(ONE_BENCH_CPP ${files}) - get_filename_component(ONE_BENCH_EXEC ${ONE_BENCH_CPP} NAME_WE) - set(TARGET_NAME bench_${ONE_BENCH_EXEC}) - cmake_print_variables(ONE_BENCH_EXEC) - - add_executable(${TARGET_NAME} ${ONE_BENCH_CPP}) - - if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") - target_link_libraries(${TARGET_NAME} PRIVATE faiss) +if(FAISS_OPT_LEVEL STREQUAL "avx2") + if(NOT WIN32) + target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma>) + else() + target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX2>) endif() + target_link_libraries(faiss_perf_tests PRIVATE faiss_avx2) +endif() - if(FAISS_OPT_LEVEL STREQUAL "avx2") - if(NOT WIN32) - target_compile_options(${TARGET_NAME} PRIVATE $<$:-mavx2 -mfma>) - else() - target_compile_options(${TARGET_NAME} PRIVATE $<$:/arch:AVX2>) - endif() - target_link_libraries(${TARGET_NAME} PRIVATE faiss_avx2) +if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX512>) endif() + target_link_libraries(faiss_perf_tests PRIVATE faiss_avx512) +endif() - if(FAISS_OPT_LEVEL STREQUAL "avx512") - if(NOT WIN32) - target_compile_options(${TARGET_NAME} PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) - else() - target_compile_options(${TARGET_NAME} PRIVATE $<$:/arch:AVX512>) +if(FAISS_OPT_LEVEL STREQUAL "sve") + if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) endif() - target_link_libraries(${TARGET_NAME} PRIVATE faiss_avx512) - endif() - - if(FAISS_OPT_LEVEL STREQUAL "sve") - if(NOT WIN32) - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:-march=armv8-a+sve>) - endif() - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(${TARGET_NAME} PRIVATE $<$,$>:-march=armv8-a+sve>) - endif() - else() - # TODO: support Windows + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) endif() - target_link_libraries(${TARGET_NAME} PRIVATE faiss_sve) + else() + # TODO: support Windows endif() - - -target_link_libraries(${TARGET_NAME} PRIVATE OpenMP::OpenMP_CXX benchmark::benchmark) -endforeach() + target_link_libraries(faiss_perf_tests PRIVATE faiss_sve) +endif() +target_link_libraries(faiss_perf_tests PRIVATE benchmark::benchmark) From 77d77a18c2109dd7ed8ac5428a9bebbcddf60e9e Mon Sep 17 00:00:00 2001 From: mengdilin Date: Mon, 26 Aug 2024 10:58:40 -0700 Subject: [PATCH 05/22] fix cmake --- perf_tests/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index b877f9419c..4630359235 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -11,6 +11,7 @@ FetchContent_MakeAvailable( googlebenchmark) find_package(Threads REQUIRED) +find_package(OpenMP REQUIRED) add_executable(faiss_perf_tests bench_6bit_codec.cpp) @@ -61,4 +62,4 @@ if(FAISS_OPT_LEVEL STREQUAL "sve") endif() target_link_libraries(faiss_perf_tests PRIVATE faiss_sve) endif() -target_link_libraries(faiss_perf_tests PRIVATE benchmark::benchmark) +target_link_libraries(faiss_perf_tests PRIVATE OpenMP::OpenMP_CXX benchmark::benchmark) From 1e82c642a0f195ff7a3f5936790c0a1ec98031d3 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 10:00:08 -0700 Subject: [PATCH 06/22] move files --- perf_tests/bench_hnsw.py | 196 ++++++++++++++++++ .../bench_no_multithreading_rcq_search.cpp | 65 ++++++ .../bench_scalar_quantizer_accuracy.cpp | 88 ++++++++ perf_tests/bench_scalar_quantizer_decode.cpp | 71 +++++++ ...pp => bench_scalar_quantizer_distance.cpp} | 64 +++--- perf_tests/bench_scalar_quantizer_encode.cpp | 65 ++++++ perf_tests/utils.cpp | 20 ++ perf_tests/utils.h | 11 + 8 files changed, 547 insertions(+), 33 deletions(-) create mode 100644 perf_tests/bench_hnsw.py create mode 100644 perf_tests/bench_no_multithreading_rcq_search.cpp create mode 100644 perf_tests/bench_scalar_quantizer_accuracy.cpp create mode 100644 perf_tests/bench_scalar_quantizer_decode.cpp rename perf_tests/{bench_6bit_codec.cpp => bench_scalar_quantizer_distance.cpp} (55%) create mode 100644 perf_tests/bench_scalar_quantizer_encode.cpp create mode 100644 perf_tests/utils.cpp create mode 100644 perf_tests/utils.h diff --git a/perf_tests/bench_hnsw.py b/perf_tests/bench_hnsw.py new file mode 100644 index 0000000000..ceace5f2b7 --- /dev/null +++ b/perf_tests/bench_hnsw.py @@ -0,0 +1,196 @@ +import argparse +import resource +import time +from contextlib import contextmanager +from dataclasses import dataclass +from typing import Dict, Generator, List, Optional + +import faiss # @manual=//faiss/python:pyfaiss +import numpy as np +from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib + Dataset, + SyntheticDataset, +) + +US_IN_S = 1_000_000 + + +@dataclass +class PerfCounters: + wall_time_s: float = 0.0 + user_time_s: float = 0.0 + system_time_s: float = 0.0 + + +@contextmanager +def timed_execution() -> Generator[PerfCounters, None, None]: + pcounters = PerfCounters() + wall_time_start = time.perf_counter() + rusage_start = resource.getrusage(resource.RUSAGE_SELF) + yield pcounters + wall_time_end = time.perf_counter() + rusage_end = resource.getrusage(resource.RUSAGE_SELF) + pcounters.wall_time_s = wall_time_end - wall_time_start + pcounters.user_time_s = rusage_end.ru_utime - rusage_start.ru_utime + pcounters.system_time_s = rusage_end.ru_stime - rusage_start.ru_stime + + +def is_perf_counter(key: str) -> bool: + return key.endswith("_time_us") + + +def accumulate_perf_counter( + phase: str, + t: PerfCounters, + counters: Dict[str, int] +): + counters[f"{phase}_wall_time_us"] = int(t.wall_time_s * US_IN_S) + counters[f"{phase}_user_time_us"] = int(t.user_time_s * US_IN_S) + counters[f"{phase}_system_time_us"] = int(t.system_time_s * US_IN_S) + + +def run_on_dataset( + ds: Dataset, + M: int, + num_threads: int, + num_add_iterations: int, + num_search_iterations: int, + efSearch: int = 16, + efConstruction: int = 40, + search_bounded_queue: bool = True, +) -> Dict[str, int]: + xq = ds.get_queries() + xb = ds.get_database() + + nb, d = xb.shape + nq, d = xq.shape + + k = 10 + # pyre-ignore[16]: Module `faiss` has no attribute `omp_set_num_threads`. + faiss.omp_set_num_threads(num_threads) + index = faiss.IndexHNSWFlat(d, M) + index.hnsw.efConstruction = efConstruction # default + with timed_execution() as t: + for _ in range(num_add_iterations): + index.add(xb) + counters = {} + accumulate_perf_counter("add", t, counters) + counters["nb"] = nb + counters["num_add_iterations"] = num_add_iterations + + index.hnsw.efSearch = efSearch + index.hnsw.search_bounded_queue = search_bounded_queue + with timed_execution() as t: + for _ in range(num_search_iterations): + D, I = index.search(xq, k) + accumulate_perf_counter("search", t, counters) + counters["nq"] = nq + counters["efSearch"] = efSearch + counters["efConstruction"] = efConstruction + counters["M"] = M + counters["d"] = d + counters["num_search_iterations"] = num_search_iterations + + return counters + + +def run( + d: int, + nb: int, + nq: int, + M: int, + num_threads: int, + num_add_iterations: int = 1, + num_search_iterations: int = 1, + efSearch: int = 16, + efConstruction: int = 40, + search_bounded_queue: bool = True, +) -> Dict[str, int]: + ds = SyntheticDataset(d=d, nb=nb, nt=0, nq=nq, metric="L2", seed=1338) + return run_on_dataset( + ds, + M=M, + num_add_iterations=num_add_iterations, + num_search_iterations=num_search_iterations, + num_threads=num_threads, + efSearch=efSearch, + efConstruction=efConstruction, + search_bounded_queue=search_bounded_queue, + ) + + +def _accumulate_counters( + element: Dict[str, int], accu: Optional[Dict[str, List[int]]] = None +) -> Dict[str, List[int]]: + if accu is None: + accu = {key: [value] for key, value in element.items()} + return accu + else: + assert accu.keys() <= element.keys(), ( + "Accu keys must be a subset of element keys: " + f"{accu.keys()} not a subset of {element.keys()}" + ) + for key in accu.keys(): + accu[key].append(element[key]) + return accu + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark HNSW") + parser.add_argument("-M", "--M", type=int, required=True) + parser.add_argument("-t", "--num-threads", type=int, required=True) + parser.add_argument("-w", "--warm-up-iterations", type=int, default=0) + parser.add_argument("-i", "--num-search-iterations", type=int, default=20) + parser.add_argument("-i", "--num-add-iterations", type=int, default=20) + parser.add_argument("-r", "--num-repetitions", type=int, default=20) + parser.add_argument("-s", "--ef-search", type=int, default=16) + parser.add_argument("-c", "--ef-construction", type=int, default=40) + parser.add_argument("-b", "--search-bounded-queue", action="store_true") + + parser.add_argument("-n", "--nb", type=int, default=5000) + parser.add_argument("-q", "--nq", type=int, default=500) + parser.add_argument("-d", "--d", type=int, default=128) + args = parser.parse_args() + + if args.warm_up_iterations > 0: + print(f"Warming up for {args.warm_up_iterations} iterations...") + # warm-up + run( + num_search_iterations=args.warm_up_iterations, + num_add_iterations=args.warm_up_iterations, + d=args.d, + nb=args.nb, + nq=args.nq, + M=args.M, + num_threads=args.num_threads, + efSearch=args.ef_search, + efConstruction=args.ef_construction, + search_bounded_queue=args.search_bounded_queue, + ) + print( + f"Running benchmark with dataset(nb={args.nb}, nq={args.nq}, " + f"d={args.d}), M={args.M}, num_threads={args.num_threads}, " + f"efSearch={args.ef_search}, efConstruction={args.ef_construction}" + ) + result = None + for _ in range(args.num_repetitions): + counters = run( + num_search_iterations=args.num_search_iterations, + num_add_iterations=args.num_add_iterations, + d=args.d, + nb=args.nb, + nq=args.nq, + M=args.M, + num_threads=args.num_threads, + efSearch=args.ef_search, + efConstruction=args.ef_construction, + search_bounded_queue=args.search_bounded_queue, + ) + result = _accumulate_counters(counters, result) + assert result is not None + for counter, values in result.items(): + if is_perf_counter(counter): + print( + "%s t=%.3f us (± %.4f)" % + (counter, np.mean(values), np.std(values)) + ) diff --git a/perf_tests/bench_no_multithreading_rcq_search.cpp b/perf_tests/bench_no_multithreading_rcq_search.cpp new file mode 100644 index 0000000000..ff2c8eab34 --- /dev/null +++ b/perf_tests/bench_no_multithreading_rcq_search.cpp @@ -0,0 +1,65 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include // @manual=//faiss:faiss_no_multithreading +#include // @manual=//faiss:faiss_no_multithreading + +using namespace faiss; +DEFINE_uint32(iterations, 20, "iterations"); +DEFINE_uint32(nprobe, 1, "nprobe"); +DEFINE_uint32(batch_size, 1, "batch_size"); +DEFINE_double(beam_factor, 4.0, "beam factor"); + +static void bench_search( + benchmark::State& state, + int batch_size, + int nprobe, + float beam_factor) { + int d = 512; + int nt = 2 << 15; + std::vector xt(d * nt); + + float_rand(xt.data(), d * nt, 12345); + ResidualCoarseQuantizer rq(d, {16, 8}); + rq.verbose = false; + rq.train(nt, xt.data()); + + std::vector xq(d * batch_size); + float_rand(xq.data(), d * batch_size, 12345); + + std::vector distances(nprobe * batch_size); + std::vector clusterIndices(nprobe * batch_size); + SearchParametersResidualCoarseQuantizer param; + param.beam_factor = beam_factor; + for (auto _ : state) { + rq.search( + batch_size, + xq.data(), + nprobe, + distances.data(), + clusterIndices.data(), + ¶m); + } +} + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + gflags::AllowCommandLineReparsing(); + gflags::ParseCommandLineFlags(&argc, &argv, true); + int iterations = FLAGS_iterations; + int nprobe = FLAGS_nprobe; + float beam_factor = FLAGS_beam_factor; + int batch_size = FLAGS_batch_size; + benchmark::RegisterBenchmark( + "search", bench_search, batch_size, nprobe, beam_factor) + ->Iterations(iterations); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/perf_tests/bench_scalar_quantizer_accuracy.cpp b/perf_tests/bench_scalar_quantizer_accuracy.cpp new file mode 100644 index 0000000000..b13b9114b8 --- /dev/null +++ b/perf_tests/bench_scalar_quantizer_accuracy.cpp @@ -0,0 +1,88 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace faiss; +DEFINE_uint32(d, 128, "dimension"); +DEFINE_uint32(n, 2000, "dimension"); +DEFINE_uint32(iterations, 20, "iterations"); + +static void bench_reconstruction_error( + benchmark::State& state, + ScalarQuantizer::QuantizerType type, + int d, + int n) { + std::vector x(d * n); + + float_rand(x.data(), d * n, 12345); + + // make sure it's idempotent + ScalarQuantizer sq(d, type); + + sq.train(n, x.data()); + + size_t code_size = sq.code_size; + state.counters["code_size"] = sq.code_size; + + // encode + std::vector codes(code_size * n); + sq.compute_codes(x.data(), codes.data(), n); + + // decode + std::vector x2(d * n); + sq.decode(codes.data(), x2.data(), n); + + state.counters["sql2_recons_error"] = + fvec_L2sqr(x.data(), x2.data(), n * d) / n; + + // encode again + std::vector codes2(code_size * n); + sq.compute_codes(x2.data(), codes2.data(), n); + + size_t ndiff = 0; + for (size_t i = 0; i < codes.size(); i++) { + if (codes[i] != codes2[i]) + ndiff++; + } + + state.counters["ndiff_for_idempotence"] = ndiff; + + state.counters["code_size_two"] = codes.size(); +} + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + gflags::AllowCommandLineReparsing(); + gflags::ParseCommandLineFlags(&argc, &argv, true); + int iterations = FLAGS_iterations; + int d = FLAGS_d; + int n = FLAGS_n; + auto benchs = ::perf_tests::sq_types(); + + for (auto& [bench_name, quantizer_type] : benchs) { + benchmark::RegisterBenchmark( + bench_name.c_str(), + bench_reconstruction_error, + quantizer_type, + d, + n) + ->Iterations(iterations); + } + + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/perf_tests/bench_scalar_quantizer_decode.cpp b/perf_tests/bench_scalar_quantizer_decode.cpp new file mode 100644 index 0000000000..fc9d520557 --- /dev/null +++ b/perf_tests/bench_scalar_quantizer_decode.cpp @@ -0,0 +1,71 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace faiss; +DEFINE_uint32(d, 128, "dimension"); +DEFINE_uint32(n, 2000, "dimension"); +DEFINE_uint32(iterations, 20, "iterations"); + +static void bench_decode( + benchmark::State& state, + ScalarQuantizer::QuantizerType type, + int d, + int n) { + std::vector x(d * n); + + float_rand(x.data(), d * n, 12345); + + // make sure it's idempotent + ScalarQuantizer sq(d, type); + + omp_set_num_threads(1); + + sq.train(n, x.data()); + + size_t code_size = sq.code_size; + state.counters["code_size"] = sq.code_size; + + // encode + std::vector codes(code_size * n); + sq.compute_codes(x.data(), codes.data(), n); + std::vector x2(d * n); + + for (auto _ : state) { + // decode + sq.decode(codes.data(), x2.data(), n); + } +} + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + gflags::AllowCommandLineReparsing(); + gflags::ParseCommandLineFlags(&argc, &argv, true); + int iterations = FLAGS_iterations; + int d = FLAGS_d; + int n = FLAGS_n; + auto benchs = ::perf_tests::sq_types(); + + for (auto& [bench_name, quantizer_type] : benchs) { + benchmark::RegisterBenchmark( + bench_name.c_str(), bench_decode, quantizer_type, d, n) + ->Iterations(iterations); + } + + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/perf_tests/bench_6bit_codec.cpp b/perf_tests/bench_scalar_quantizer_distance.cpp similarity index 55% rename from perf_tests/bench_6bit_codec.cpp rename to perf_tests/bench_scalar_quantizer_distance.cpp index b4ac0b04b6..d0d1d9a474 100644 --- a/perf_tests/bench_6bit_codec.cpp +++ b/perf_tests/bench_scalar_quantizer_distance.cpp @@ -5,27 +5,33 @@ * LICENSE file in the root directory of this source tree. */ +#include #include #include +#include #include #include -#include +#include #include #include using namespace faiss; - -static void bench(benchmark::State& state) { - int d = 128; - int n = 2000; - +DEFINE_uint32(d, 128, "dimension"); +DEFINE_uint32(n, 2000, "dimension"); +DEFINE_uint32(iterations, 20, "iterations"); + +static void bench_distance( + benchmark::State& state, + ScalarQuantizer::QuantizerType type, + int n, + int d) { std::vector x(d * n); float_rand(x.data(), d * n, 12345); // make sure it's idempotent - ScalarQuantizer sq(d, ScalarQuantizer::QT_6bit); + ScalarQuantizer sq(d, type); omp_set_num_threads(1); @@ -38,32 +44,10 @@ static void bench(benchmark::State& state) { std::vector codes(code_size * n); sq.compute_codes(x.data(), codes.data(), n); - // decode - std::vector x2(d * n); - sq.decode(codes.data(), x2.data(), n); - - state.counters["sql2_recons_error"] = - fvec_L2sqr(x.data(), x2.data(), n * d) / n; - - // encode again - std::vector codes2(code_size * n); - sq.compute_codes(x2.data(), codes2.data(), n); - - size_t ndiff = 0; - for (size_t i = 0; i < codes.size(); i++) { - if (codes[i] != codes2[i]) - ndiff++; - } - - state.counters["ndiff_for_idempotence"] = ndiff; - - state.counters["code_size_two"] = codes.size(); - std::unique_ptr dc( sq.get_distance_computer()); dc->codes = codes.data(); dc->code_size = sq.code_size; - state.counters["code_size_three"] = dc->code_size; for (auto _ : state) { float sum_dis = 0; @@ -75,7 +59,21 @@ static void bench(benchmark::State& state) { } } } -// I think maybe n and d should be input arguments -// for things to really make sense, idk. -BENCHMARK(bench)->Iterations(20); -BENCHMARK_MAIN(); + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + gflags::AllowCommandLineReparsing(); + gflags::ParseCommandLineFlags(&argc, &argv, true); + int iterations = FLAGS_iterations; + int d = FLAGS_d; + int n = FLAGS_n; + auto benchs = ::perf_tests::sq_types(); + + for (auto& [bench_name, quantizer_type] : benchs) { + benchmark::RegisterBenchmark( + bench_name.c_str(), bench_distance, quantizer_type, d, n) + ->Iterations(iterations); + } + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/perf_tests/bench_scalar_quantizer_encode.cpp b/perf_tests/bench_scalar_quantizer_encode.cpp new file mode 100644 index 0000000000..40c95dabb4 --- /dev/null +++ b/perf_tests/bench_scalar_quantizer_encode.cpp @@ -0,0 +1,65 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace faiss; +DEFINE_uint32(d, 128, "dimension"); +DEFINE_uint32(n, 2000, "dimension"); +DEFINE_uint32(iterations, 20, "iterations"); + +static void bench_encode( + benchmark::State& state, + ScalarQuantizer::QuantizerType type, + int d, + int n) { + std::vector x(d * n); + + float_rand(x.data(), d * n, 12345); + ScalarQuantizer sq(d, type); + + omp_set_num_threads(1); + size_t code_size = sq.code_size; + + sq.train(n, x.data()); + state.counters["code_size"] = sq.code_size; + std::vector codes(code_size * n); + + for (auto _ : state) { + // encode + sq.compute_codes(x.data(), codes.data(), n); + } +} + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + gflags::AllowCommandLineReparsing(); + gflags::ParseCommandLineFlags(&argc, &argv, true); + int iterations = FLAGS_iterations; + int d = FLAGS_d; + int n = FLAGS_n; + auto benchs = ::perf_tests::sq_types(); + + for (auto& [bench_name, quantizer_type] : benchs) { + benchmark::RegisterBenchmark( + bench_name.c_str(), bench_encode, quantizer_type, d, n) + ->Iterations(iterations); + } + + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/perf_tests/utils.cpp b/perf_tests/utils.cpp new file mode 100644 index 0000000000..3e6c33220d --- /dev/null +++ b/perf_tests/utils.cpp @@ -0,0 +1,20 @@ +#include +namespace faiss::perf_tests { +std::map sq_types() { + static std::map + sq_types = { + {"QT_8bit", faiss::ScalarQuantizer::QT_8bit}, + {"QT_4bit", faiss::ScalarQuantizer::QT_4bit}, + {"QT_8bit_uniform", + faiss::ScalarQuantizer::QT_8bit_uniform}, + {"QT_4bit_uniform", + faiss::ScalarQuantizer::QT_4bit_uniform}, + {"QT_fp16", faiss::ScalarQuantizer::QT_fp16}, + {"QT_8bit_direct", faiss::ScalarQuantizer::QT_8bit_direct}, + {"QT_6bit", faiss::ScalarQuantizer::QT_6bit}, + {"QT_bf16", faiss::ScalarQuantizer::QT_bf16}, + {"QT_8bit_direct_signed", + faiss::ScalarQuantizer::QT_8bit_direct_signed}}; + return sq_types; +} +} // namespace faiss::perf_tests diff --git a/perf_tests/utils.h b/perf_tests/utils.h new file mode 100644 index 0000000000..e3065b9d4c --- /dev/null +++ b/perf_tests/utils.h @@ -0,0 +1,11 @@ +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +#pragma once +#include +#include + +namespace faiss::perf_tests { + +std::map sq_types(); + +} // namespace faiss::perf_tests From a74bdb9f4314db2fdd50904761d8ca12c934da2f Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 10:00:31 -0700 Subject: [PATCH 07/22] remove files --- faiss/perf_tests/bench_hnsw.py | 196 ------------------ .../bench_no_multithreading_rcq_search.cpp | 65 ------ .../bench_scalar_quantizer_accuracy.cpp | 88 -------- .../bench_scalar_quantizer_decode.cpp | 71 ------- .../bench_scalar_quantizer_distance.cpp | 79 ------- .../bench_scalar_quantizer_encode.cpp | 65 ------ faiss/perf_tests/utils.cpp | 20 -- faiss/perf_tests/utils.h | 11 - 8 files changed, 595 deletions(-) delete mode 100644 faiss/perf_tests/bench_hnsw.py delete mode 100644 faiss/perf_tests/bench_no_multithreading_rcq_search.cpp delete mode 100644 faiss/perf_tests/bench_scalar_quantizer_accuracy.cpp delete mode 100644 faiss/perf_tests/bench_scalar_quantizer_decode.cpp delete mode 100644 faiss/perf_tests/bench_scalar_quantizer_distance.cpp delete mode 100644 faiss/perf_tests/bench_scalar_quantizer_encode.cpp delete mode 100644 faiss/perf_tests/utils.cpp delete mode 100644 faiss/perf_tests/utils.h diff --git a/faiss/perf_tests/bench_hnsw.py b/faiss/perf_tests/bench_hnsw.py deleted file mode 100644 index ceace5f2b7..0000000000 --- a/faiss/perf_tests/bench_hnsw.py +++ /dev/null @@ -1,196 +0,0 @@ -import argparse -import resource -import time -from contextlib import contextmanager -from dataclasses import dataclass -from typing import Dict, Generator, List, Optional - -import faiss # @manual=//faiss/python:pyfaiss -import numpy as np -from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib - Dataset, - SyntheticDataset, -) - -US_IN_S = 1_000_000 - - -@dataclass -class PerfCounters: - wall_time_s: float = 0.0 - user_time_s: float = 0.0 - system_time_s: float = 0.0 - - -@contextmanager -def timed_execution() -> Generator[PerfCounters, None, None]: - pcounters = PerfCounters() - wall_time_start = time.perf_counter() - rusage_start = resource.getrusage(resource.RUSAGE_SELF) - yield pcounters - wall_time_end = time.perf_counter() - rusage_end = resource.getrusage(resource.RUSAGE_SELF) - pcounters.wall_time_s = wall_time_end - wall_time_start - pcounters.user_time_s = rusage_end.ru_utime - rusage_start.ru_utime - pcounters.system_time_s = rusage_end.ru_stime - rusage_start.ru_stime - - -def is_perf_counter(key: str) -> bool: - return key.endswith("_time_us") - - -def accumulate_perf_counter( - phase: str, - t: PerfCounters, - counters: Dict[str, int] -): - counters[f"{phase}_wall_time_us"] = int(t.wall_time_s * US_IN_S) - counters[f"{phase}_user_time_us"] = int(t.user_time_s * US_IN_S) - counters[f"{phase}_system_time_us"] = int(t.system_time_s * US_IN_S) - - -def run_on_dataset( - ds: Dataset, - M: int, - num_threads: int, - num_add_iterations: int, - num_search_iterations: int, - efSearch: int = 16, - efConstruction: int = 40, - search_bounded_queue: bool = True, -) -> Dict[str, int]: - xq = ds.get_queries() - xb = ds.get_database() - - nb, d = xb.shape - nq, d = xq.shape - - k = 10 - # pyre-ignore[16]: Module `faiss` has no attribute `omp_set_num_threads`. - faiss.omp_set_num_threads(num_threads) - index = faiss.IndexHNSWFlat(d, M) - index.hnsw.efConstruction = efConstruction # default - with timed_execution() as t: - for _ in range(num_add_iterations): - index.add(xb) - counters = {} - accumulate_perf_counter("add", t, counters) - counters["nb"] = nb - counters["num_add_iterations"] = num_add_iterations - - index.hnsw.efSearch = efSearch - index.hnsw.search_bounded_queue = search_bounded_queue - with timed_execution() as t: - for _ in range(num_search_iterations): - D, I = index.search(xq, k) - accumulate_perf_counter("search", t, counters) - counters["nq"] = nq - counters["efSearch"] = efSearch - counters["efConstruction"] = efConstruction - counters["M"] = M - counters["d"] = d - counters["num_search_iterations"] = num_search_iterations - - return counters - - -def run( - d: int, - nb: int, - nq: int, - M: int, - num_threads: int, - num_add_iterations: int = 1, - num_search_iterations: int = 1, - efSearch: int = 16, - efConstruction: int = 40, - search_bounded_queue: bool = True, -) -> Dict[str, int]: - ds = SyntheticDataset(d=d, nb=nb, nt=0, nq=nq, metric="L2", seed=1338) - return run_on_dataset( - ds, - M=M, - num_add_iterations=num_add_iterations, - num_search_iterations=num_search_iterations, - num_threads=num_threads, - efSearch=efSearch, - efConstruction=efConstruction, - search_bounded_queue=search_bounded_queue, - ) - - -def _accumulate_counters( - element: Dict[str, int], accu: Optional[Dict[str, List[int]]] = None -) -> Dict[str, List[int]]: - if accu is None: - accu = {key: [value] for key, value in element.items()} - return accu - else: - assert accu.keys() <= element.keys(), ( - "Accu keys must be a subset of element keys: " - f"{accu.keys()} not a subset of {element.keys()}" - ) - for key in accu.keys(): - accu[key].append(element[key]) - return accu - - -def main(): - parser = argparse.ArgumentParser(description="Benchmark HNSW") - parser.add_argument("-M", "--M", type=int, required=True) - parser.add_argument("-t", "--num-threads", type=int, required=True) - parser.add_argument("-w", "--warm-up-iterations", type=int, default=0) - parser.add_argument("-i", "--num-search-iterations", type=int, default=20) - parser.add_argument("-i", "--num-add-iterations", type=int, default=20) - parser.add_argument("-r", "--num-repetitions", type=int, default=20) - parser.add_argument("-s", "--ef-search", type=int, default=16) - parser.add_argument("-c", "--ef-construction", type=int, default=40) - parser.add_argument("-b", "--search-bounded-queue", action="store_true") - - parser.add_argument("-n", "--nb", type=int, default=5000) - parser.add_argument("-q", "--nq", type=int, default=500) - parser.add_argument("-d", "--d", type=int, default=128) - args = parser.parse_args() - - if args.warm_up_iterations > 0: - print(f"Warming up for {args.warm_up_iterations} iterations...") - # warm-up - run( - num_search_iterations=args.warm_up_iterations, - num_add_iterations=args.warm_up_iterations, - d=args.d, - nb=args.nb, - nq=args.nq, - M=args.M, - num_threads=args.num_threads, - efSearch=args.ef_search, - efConstruction=args.ef_construction, - search_bounded_queue=args.search_bounded_queue, - ) - print( - f"Running benchmark with dataset(nb={args.nb}, nq={args.nq}, " - f"d={args.d}), M={args.M}, num_threads={args.num_threads}, " - f"efSearch={args.ef_search}, efConstruction={args.ef_construction}" - ) - result = None - for _ in range(args.num_repetitions): - counters = run( - num_search_iterations=args.num_search_iterations, - num_add_iterations=args.num_add_iterations, - d=args.d, - nb=args.nb, - nq=args.nq, - M=args.M, - num_threads=args.num_threads, - efSearch=args.ef_search, - efConstruction=args.ef_construction, - search_bounded_queue=args.search_bounded_queue, - ) - result = _accumulate_counters(counters, result) - assert result is not None - for counter, values in result.items(): - if is_perf_counter(counter): - print( - "%s t=%.3f us (± %.4f)" % - (counter, np.mean(values), np.std(values)) - ) diff --git a/faiss/perf_tests/bench_no_multithreading_rcq_search.cpp b/faiss/perf_tests/bench_no_multithreading_rcq_search.cpp deleted file mode 100644 index ff2c8eab34..0000000000 --- a/faiss/perf_tests/bench_no_multithreading_rcq_search.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include - -#include -#include // @manual=//faiss:faiss_no_multithreading -#include // @manual=//faiss:faiss_no_multithreading - -using namespace faiss; -DEFINE_uint32(iterations, 20, "iterations"); -DEFINE_uint32(nprobe, 1, "nprobe"); -DEFINE_uint32(batch_size, 1, "batch_size"); -DEFINE_double(beam_factor, 4.0, "beam factor"); - -static void bench_search( - benchmark::State& state, - int batch_size, - int nprobe, - float beam_factor) { - int d = 512; - int nt = 2 << 15; - std::vector xt(d * nt); - - float_rand(xt.data(), d * nt, 12345); - ResidualCoarseQuantizer rq(d, {16, 8}); - rq.verbose = false; - rq.train(nt, xt.data()); - - std::vector xq(d * batch_size); - float_rand(xq.data(), d * batch_size, 12345); - - std::vector distances(nprobe * batch_size); - std::vector clusterIndices(nprobe * batch_size); - SearchParametersResidualCoarseQuantizer param; - param.beam_factor = beam_factor; - for (auto _ : state) { - rq.search( - batch_size, - xq.data(), - nprobe, - distances.data(), - clusterIndices.data(), - ¶m); - } -} - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - gflags::AllowCommandLineReparsing(); - gflags::ParseCommandLineFlags(&argc, &argv, true); - int iterations = FLAGS_iterations; - int nprobe = FLAGS_nprobe; - float beam_factor = FLAGS_beam_factor; - int batch_size = FLAGS_batch_size; - benchmark::RegisterBenchmark( - "search", bench_search, batch_size, nprobe, beam_factor) - ->Iterations(iterations); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} diff --git a/faiss/perf_tests/bench_scalar_quantizer_accuracy.cpp b/faiss/perf_tests/bench_scalar_quantizer_accuracy.cpp deleted file mode 100644 index b13b9114b8..0000000000 --- a/faiss/perf_tests/bench_scalar_quantizer_accuracy.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace faiss; -DEFINE_uint32(d, 128, "dimension"); -DEFINE_uint32(n, 2000, "dimension"); -DEFINE_uint32(iterations, 20, "iterations"); - -static void bench_reconstruction_error( - benchmark::State& state, - ScalarQuantizer::QuantizerType type, - int d, - int n) { - std::vector x(d * n); - - float_rand(x.data(), d * n, 12345); - - // make sure it's idempotent - ScalarQuantizer sq(d, type); - - sq.train(n, x.data()); - - size_t code_size = sq.code_size; - state.counters["code_size"] = sq.code_size; - - // encode - std::vector codes(code_size * n); - sq.compute_codes(x.data(), codes.data(), n); - - // decode - std::vector x2(d * n); - sq.decode(codes.data(), x2.data(), n); - - state.counters["sql2_recons_error"] = - fvec_L2sqr(x.data(), x2.data(), n * d) / n; - - // encode again - std::vector codes2(code_size * n); - sq.compute_codes(x2.data(), codes2.data(), n); - - size_t ndiff = 0; - for (size_t i = 0; i < codes.size(); i++) { - if (codes[i] != codes2[i]) - ndiff++; - } - - state.counters["ndiff_for_idempotence"] = ndiff; - - state.counters["code_size_two"] = codes.size(); -} - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - gflags::AllowCommandLineReparsing(); - gflags::ParseCommandLineFlags(&argc, &argv, true); - int iterations = FLAGS_iterations; - int d = FLAGS_d; - int n = FLAGS_n; - auto benchs = ::perf_tests::sq_types(); - - for (auto& [bench_name, quantizer_type] : benchs) { - benchmark::RegisterBenchmark( - bench_name.c_str(), - bench_reconstruction_error, - quantizer_type, - d, - n) - ->Iterations(iterations); - } - - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} diff --git a/faiss/perf_tests/bench_scalar_quantizer_decode.cpp b/faiss/perf_tests/bench_scalar_quantizer_decode.cpp deleted file mode 100644 index fc9d520557..0000000000 --- a/faiss/perf_tests/bench_scalar_quantizer_decode.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -using namespace faiss; -DEFINE_uint32(d, 128, "dimension"); -DEFINE_uint32(n, 2000, "dimension"); -DEFINE_uint32(iterations, 20, "iterations"); - -static void bench_decode( - benchmark::State& state, - ScalarQuantizer::QuantizerType type, - int d, - int n) { - std::vector x(d * n); - - float_rand(x.data(), d * n, 12345); - - // make sure it's idempotent - ScalarQuantizer sq(d, type); - - omp_set_num_threads(1); - - sq.train(n, x.data()); - - size_t code_size = sq.code_size; - state.counters["code_size"] = sq.code_size; - - // encode - std::vector codes(code_size * n); - sq.compute_codes(x.data(), codes.data(), n); - std::vector x2(d * n); - - for (auto _ : state) { - // decode - sq.decode(codes.data(), x2.data(), n); - } -} - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - gflags::AllowCommandLineReparsing(); - gflags::ParseCommandLineFlags(&argc, &argv, true); - int iterations = FLAGS_iterations; - int d = FLAGS_d; - int n = FLAGS_n; - auto benchs = ::perf_tests::sq_types(); - - for (auto& [bench_name, quantizer_type] : benchs) { - benchmark::RegisterBenchmark( - bench_name.c_str(), bench_decode, quantizer_type, d, n) - ->Iterations(iterations); - } - - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} diff --git a/faiss/perf_tests/bench_scalar_quantizer_distance.cpp b/faiss/perf_tests/bench_scalar_quantizer_distance.cpp deleted file mode 100644 index d0d1d9a474..0000000000 --- a/faiss/perf_tests/bench_scalar_quantizer_distance.cpp +++ /dev/null @@ -1,79 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace faiss; -DEFINE_uint32(d, 128, "dimension"); -DEFINE_uint32(n, 2000, "dimension"); -DEFINE_uint32(iterations, 20, "iterations"); - -static void bench_distance( - benchmark::State& state, - ScalarQuantizer::QuantizerType type, - int n, - int d) { - std::vector x(d * n); - - float_rand(x.data(), d * n, 12345); - - // make sure it's idempotent - ScalarQuantizer sq(d, type); - - omp_set_num_threads(1); - - sq.train(n, x.data()); - - size_t code_size = sq.code_size; - state.counters["code_size"] = sq.code_size; - - // encode - std::vector codes(code_size * n); - sq.compute_codes(x.data(), codes.data(), n); - - std::unique_ptr dc( - sq.get_distance_computer()); - dc->codes = codes.data(); - dc->code_size = sq.code_size; - - for (auto _ : state) { - float sum_dis = 0; - for (int i = 0; i < n; i++) { - dc->set_query(&x[i * d]); - for (int j = 0; j < n; j++) { - benchmark::DoNotOptimize(sum_dis += (*dc)(j)); - } - } - } -} - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - gflags::AllowCommandLineReparsing(); - gflags::ParseCommandLineFlags(&argc, &argv, true); - int iterations = FLAGS_iterations; - int d = FLAGS_d; - int n = FLAGS_n; - auto benchs = ::perf_tests::sq_types(); - - for (auto& [bench_name, quantizer_type] : benchs) { - benchmark::RegisterBenchmark( - bench_name.c_str(), bench_distance, quantizer_type, d, n) - ->Iterations(iterations); - } - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} diff --git a/faiss/perf_tests/bench_scalar_quantizer_encode.cpp b/faiss/perf_tests/bench_scalar_quantizer_encode.cpp deleted file mode 100644 index 40c95dabb4..0000000000 --- a/faiss/perf_tests/bench_scalar_quantizer_encode.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Copyright (c) Facebook, Inc. and its affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -using namespace faiss; -DEFINE_uint32(d, 128, "dimension"); -DEFINE_uint32(n, 2000, "dimension"); -DEFINE_uint32(iterations, 20, "iterations"); - -static void bench_encode( - benchmark::State& state, - ScalarQuantizer::QuantizerType type, - int d, - int n) { - std::vector x(d * n); - - float_rand(x.data(), d * n, 12345); - ScalarQuantizer sq(d, type); - - omp_set_num_threads(1); - size_t code_size = sq.code_size; - - sq.train(n, x.data()); - state.counters["code_size"] = sq.code_size; - std::vector codes(code_size * n); - - for (auto _ : state) { - // encode - sq.compute_codes(x.data(), codes.data(), n); - } -} - -int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - gflags::AllowCommandLineReparsing(); - gflags::ParseCommandLineFlags(&argc, &argv, true); - int iterations = FLAGS_iterations; - int d = FLAGS_d; - int n = FLAGS_n; - auto benchs = ::perf_tests::sq_types(); - - for (auto& [bench_name, quantizer_type] : benchs) { - benchmark::RegisterBenchmark( - bench_name.c_str(), bench_encode, quantizer_type, d, n) - ->Iterations(iterations); - } - - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); -} diff --git a/faiss/perf_tests/utils.cpp b/faiss/perf_tests/utils.cpp deleted file mode 100644 index 3e6c33220d..0000000000 --- a/faiss/perf_tests/utils.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include -namespace faiss::perf_tests { -std::map sq_types() { - static std::map - sq_types = { - {"QT_8bit", faiss::ScalarQuantizer::QT_8bit}, - {"QT_4bit", faiss::ScalarQuantizer::QT_4bit}, - {"QT_8bit_uniform", - faiss::ScalarQuantizer::QT_8bit_uniform}, - {"QT_4bit_uniform", - faiss::ScalarQuantizer::QT_4bit_uniform}, - {"QT_fp16", faiss::ScalarQuantizer::QT_fp16}, - {"QT_8bit_direct", faiss::ScalarQuantizer::QT_8bit_direct}, - {"QT_6bit", faiss::ScalarQuantizer::QT_6bit}, - {"QT_bf16", faiss::ScalarQuantizer::QT_bf16}, - {"QT_8bit_direct_signed", - faiss::ScalarQuantizer::QT_8bit_direct_signed}}; - return sq_types; -} -} // namespace faiss::perf_tests diff --git a/faiss/perf_tests/utils.h b/faiss/perf_tests/utils.h deleted file mode 100644 index e3065b9d4c..0000000000 --- a/faiss/perf_tests/utils.h +++ /dev/null @@ -1,11 +0,0 @@ -// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. - -#pragma once -#include -#include - -namespace faiss::perf_tests { - -std::map sq_types(); - -} // namespace faiss::perf_tests From be634fedb7ef47862ec95ef233aecc3c07bbc3be Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 11:22:20 -0700 Subject: [PATCH 08/22] perf tests --- .github/actions/build_cmake/action.yml | 2 +- perf_tests/CMakeLists.txt | 102 +++++++++++++++--- .../bench_scalar_quantizer_accuracy.cpp | 2 +- 3 files changed, 89 insertions(+), 17 deletions(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 2328972865..8739c3c610 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -32,7 +32,7 @@ runs: conda update -y -q conda echo "$CONDA/bin" >> $GITHUB_PATH - conda install -y -q python=3.11 cmake make swig numpy scipy pytest + conda install -y -q python=3.11 cmake make swig numpy scipy pytest gflags # install base packages for ARM64 if [ "${{ runner.arch }}" = "ARM64" ]; then diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 4630359235..77806ee9d8 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -1,6 +1,5 @@ project(faiss_perf_tests) - include(FetchContent) FetchContent_Declare(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git @@ -10,31 +9,39 @@ set(BENCHMARK_ENABLE_TESTING OFF) FetchContent_MakeAvailable( googlebenchmark) +set(BENCHMARK_ENABLE_TESTING OFF) + find_package(Threads REQUIRED) find_package(OpenMP REQUIRED) +find_package(gflags REQUIRED) -add_executable(faiss_perf_tests bench_6bit_codec.cpp) +add_library(faiss_perf_tests_utils + utils.cpp +) +# `#include ` or any other headers +target_include_directories(faiss_perf_tests_utils PRIVATE + ${PROJECT_SOURCE_DIR}/../..) if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") - target_link_libraries(faiss_perf_tests PRIVATE faiss) + target_link_libraries(faiss_perf_tests_utils PRIVATE faiss) endif() if(FAISS_OPT_LEVEL STREQUAL "avx2") if(NOT WIN32) - target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$:-mavx2 -mfma>) else() - target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX2>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$:/arch:AVX2>) endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_avx2) + target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_avx2) endif() if(FAISS_OPT_LEVEL STREQUAL "avx512") if(NOT WIN32) - target_compile_options(faiss_perf_tests PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) else() - target_compile_options(faiss_perf_tests PRIVATE $<$:/arch:AVX512>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$:/arch:AVX512>) endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_avx512) + target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_avx512) endif() if(FAISS_OPT_LEVEL STREQUAL "sve") @@ -43,23 +50,88 @@ if(FAISS_OPT_LEVEL STREQUAL "sve") # Do nothing, expect SVE to be enabled by -march=native elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") # Add +sve - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:-march=armv8-a+sve>) endif() if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") # Do nothing, expect SVE to be enabled by -march=native elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") # Add +sve - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests PRIVATE $<$,$>:-march=armv8-a+sve>) + target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:-march=armv8-a+sve>) endif() else() # TODO: support Windows endif() - target_link_libraries(faiss_perf_tests PRIVATE faiss_sve) + target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_sve) endif() -target_link_libraries(faiss_perf_tests PRIVATE OpenMP::OpenMP_CXX benchmark::benchmark) + +set(FAISS_PERF_TEST_SRC + bench_no_multithreading_rcq_search.cpp + bench_scalar_quantizer_accuracy.cpp + bench_scalar_quantizer_decode.cpp + bench_scalar_quantizer_distance.cpp + bench_scalar_quantizer_encode.cpp +) +foreach(bench ${FAISS_PERF_TEST_SRC}) + get_filename_component(bench_exec ${bench} NAME_WE) + + add_executable(${bench_exec} ${bench}) + + if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") + target_link_libraries(${bench_exec} PRIVATE faiss) + endif() + + if(FAISS_OPT_LEVEL STREQUAL "avx2") + if(NOT WIN32) + target_compile_options(${bench_exec} PRIVATE $<$:-mavx2 -mfma>) + else() + target_compile_options(${bench_exec} PRIVATE $<$:/arch:AVX2>) + endif() + target_link_libraries(${bench_exec} PRIVATE faiss_avx2) + endif() + + if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(${bench_exec} PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(${bench_exec} PRIVATE $<$:/arch:AVX512>) + endif() + target_link_libraries(${bench_exec} PRIVATE faiss_avx512) + endif() + + if(FAISS_OPT_LEVEL STREQUAL "sve") + if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${bench_exec} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${bench_exec} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${bench_exec} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${bench_exec} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + else() + # TODO: support Windows + endif() + target_link_libraries(${bench_exec} PRIVATE faiss_sve) + endif() + target_link_libraries(${bench_exec} PRIVATE faiss_perf_tests_utils OpenMP::OpenMP_CXX benchmark::benchmark gflags) + # `#include ` or any other headers + target_include_directories(${bench_exec} PRIVATE + ${PROJECT_SOURCE_DIR}/../..) + +endforeach() diff --git a/perf_tests/bench_scalar_quantizer_accuracy.cpp b/perf_tests/bench_scalar_quantizer_accuracy.cpp index b13b9114b8..237e9020f5 100644 --- a/perf_tests/bench_scalar_quantizer_accuracy.cpp +++ b/perf_tests/bench_scalar_quantizer_accuracy.cpp @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include "utils.h" #include #include #include From 63183c8f4b08862e6a994ce5f276627340c67c29 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 11:42:58 -0700 Subject: [PATCH 09/22] build_cmake runs perf_tests --- .github/actions/build_cmake/action.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 8739c3c610..d09851841f 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -146,8 +146,7 @@ runs: - name: C++ perf benchmarks shell: bash run: | - make -C build -j faiss_perf_tests - cd ./build/perf_tests && ./faiss_perf_tests + find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \; - name: Install Python extension shell: bash working-directory: build/faiss/python From 10f8bdeae357bd4d3ccf28a0a3a1be6ed4e9f497 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 11:47:08 -0700 Subject: [PATCH 10/22] fix format --- perf_tests/bench_scalar_quantizer_accuracy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf_tests/bench_scalar_quantizer_accuracy.cpp b/perf_tests/bench_scalar_quantizer_accuracy.cpp index 237e9020f5..b13b9114b8 100644 --- a/perf_tests/bench_scalar_quantizer_accuracy.cpp +++ b/perf_tests/bench_scalar_quantizer_accuracy.cpp @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#include "utils.h" +#include #include #include #include From b8f5f09a9e954971309973503efc2b37d8ce6e8a Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 12:08:20 -0700 Subject: [PATCH 11/22] debug --- .github/actions/build_cmake/action.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index d09851841f..14478f02aa 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -152,6 +152,9 @@ runs: working-directory: build/faiss/python run: | $CONDA/bin/python setup.py install + - name: Setup tmate session + if: failure() + uses: mxschmitt/action-tmate@v3 - name: ROCm - install ROCm-enabled torch via pip if: inputs.rocm == 'ON' shell: bash From 99ad4f93aa5f54c041c223b564ba39f32c81c109 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 13:49:24 -0700 Subject: [PATCH 12/22] fix setup --- faiss/python/setup.py | 47 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/faiss/python/setup.py b/faiss/python/setup.py index b1ae4b1be2..aaa8621a6b 100644 --- a/faiss/python/setup.py +++ b/faiss/python/setup.py @@ -4,10 +4,12 @@ # LICENSE file in the root directory of this source tree. from __future__ import print_function -from setuptools import setup, find_packages + import os -import shutil import platform +import shutil + +from setuptools import find_packages, setup # make the faiss python package dir shutil.rmtree("faiss", ignore_errors=True) @@ -20,8 +22,8 @@ shutil.copyfile("extra_wrappers.py", "faiss/extra_wrappers.py") shutil.copyfile("array_conversions.py", "faiss/array_conversions.py") -ext = ".pyd" if platform.system() == 'Windows' else ".so" -prefix = "Release/" * (platform.system() == 'Windows') +ext = ".pyd" if platform.system() == "Windows" else ".so" +prefix = "Release/" * (platform.system() == "Windows") swigfaiss_generic_lib = f"{prefix}_swigfaiss{ext}" swigfaiss_avx2_lib = f"{prefix}_swigfaiss_avx2{ext}" @@ -35,10 +37,16 @@ found_callbacks = os.path.exists(callbacks_lib) found_swigfaiss_sve = os.path.exists(swigfaiss_sve_lib) -assert (found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512 or found_swigfaiss_sve), \ - f"Could not find {swigfaiss_generic_lib} or " \ - f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib}. " \ +assert ( + found_swigfaiss_generic + or found_swigfaiss_avx2 + or found_swigfaiss_avx512 + or found_swigfaiss_sve +), ( + f"Could not find {swigfaiss_generic_lib} or " + f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib}. " f"Faiss may not be compiled yet." +) if found_swigfaiss_generic: print(f"Copying {swigfaiss_generic_lib}") @@ -64,7 +72,7 @@ shutil.copyfile("swigfaiss_sve.py", "faiss/swigfaiss_sve.py") shutil.copyfile(swigfaiss_sve_lib, f"faiss/_swigfaiss_sve{ext}") -long_description=""" +long_description = """ Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting @@ -73,20 +81,19 @@ are implemented on the GPU. It is developed by Facebook AI Research. """ setup( - name='faiss', - version='1.8.0', - description='A library for efficient similarity search and clustering of dense vectors', + name="faiss", + version="1.8.0", + description="A library for efficient similarity search and clustering of dense vectors", long_description=long_description, - url='https://github.com/facebookresearch/faiss', - author='Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini', - author_email='matthijs@meta.com', - license='MIT', - keywords='search nearest neighbors', - - install_requires=['numpy', 'packaging'], - packages=['faiss', 'faiss.contrib', 'faiss.contrib.torch'], + url="https://github.com/facebookresearch/faiss", + author="Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini", + author_email="matthijs@meta.com", + license="MIT", + keywords="search nearest neighbors", + install_requires=["numpy", "packaging"], + packages=["faiss", "faiss.contrib", "faiss.contrib.torch"], package_data={ - 'faiss': ['*.so', '*.pyd'], + "faiss": ["*.so", "*.pyd"], }, zip_safe=False, ) From 456aabf2ce890cc7f2b6acc6691f1392208c9b1a Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 14:09:56 -0700 Subject: [PATCH 13/22] gate perf_tests behind enable testing --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7c1034987..ae75fc17f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,14 +104,13 @@ endif() add_subdirectory(demos) add_subdirectory(benchs) add_subdirectory(tutorial/cpp) -add_subdirectory(perf_tests) # CTest must be included in the top level to enable `make test` target. include(CTest) if(BUILD_TESTING) add_subdirectory(tests) - + add_subdirectory(perf_tests) if(FAISS_ENABLE_GPU) if(FAISS_ENABLE_ROCM) add_subdirectory(faiss/gpu-rocm/test) From 7a0a89433a009eb24f9747d50e87e1043a3452e6 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 14:19:50 -0700 Subject: [PATCH 14/22] remove tmate --- .github/actions/build_cmake/action.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 14478f02aa..d09851841f 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -152,9 +152,6 @@ runs: working-directory: build/faiss/python run: | $CONDA/bin/python setup.py install - - name: Setup tmate session - if: failure() - uses: mxschmitt/action-tmate@v3 - name: ROCm - install ROCm-enabled torch via pip if: inputs.rocm == 'ON' shell: bash From 12061af7ef00afe3162d8bb7598185743f85b6eb Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 14:39:26 -0700 Subject: [PATCH 15/22] linewrap ignore --- perf_tests/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 77806ee9d8..21050f535d 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -1,3 +1,10 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# @lint-ignore-every LINEWRAP project(faiss_perf_tests) include(FetchContent) From bd3576e6a78af8501fd1a49bcf3cff0fc60c9424 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 15:05:59 -0700 Subject: [PATCH 16/22] omit ci when gpu is on --- .github/actions/build_cmake/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index d09851841f..7435102791 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -145,6 +145,7 @@ runs: make -C build test - name: C++ perf benchmarks shell: bash + if: inputs.gpu == 'OFF' run: | find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \; - name: Install Python extension From b3ba0800bca20ed0895641fc6bf12a8b553785bb Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 16:04:38 -0700 Subject: [PATCH 17/22] revert change to setup.py --- faiss/python/setup.py | 47 ++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/faiss/python/setup.py b/faiss/python/setup.py index aaa8621a6b..b1ae4b1be2 100644 --- a/faiss/python/setup.py +++ b/faiss/python/setup.py @@ -4,12 +4,10 @@ # LICENSE file in the root directory of this source tree. from __future__ import print_function - +from setuptools import setup, find_packages import os -import platform import shutil - -from setuptools import find_packages, setup +import platform # make the faiss python package dir shutil.rmtree("faiss", ignore_errors=True) @@ -22,8 +20,8 @@ shutil.copyfile("extra_wrappers.py", "faiss/extra_wrappers.py") shutil.copyfile("array_conversions.py", "faiss/array_conversions.py") -ext = ".pyd" if platform.system() == "Windows" else ".so" -prefix = "Release/" * (platform.system() == "Windows") +ext = ".pyd" if platform.system() == 'Windows' else ".so" +prefix = "Release/" * (platform.system() == 'Windows') swigfaiss_generic_lib = f"{prefix}_swigfaiss{ext}" swigfaiss_avx2_lib = f"{prefix}_swigfaiss_avx2{ext}" @@ -37,16 +35,10 @@ found_callbacks = os.path.exists(callbacks_lib) found_swigfaiss_sve = os.path.exists(swigfaiss_sve_lib) -assert ( - found_swigfaiss_generic - or found_swigfaiss_avx2 - or found_swigfaiss_avx512 - or found_swigfaiss_sve -), ( - f"Could not find {swigfaiss_generic_lib} or " - f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib}. " +assert (found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512 or found_swigfaiss_sve), \ + f"Could not find {swigfaiss_generic_lib} or " \ + f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib} or {swigfaiss_sve_lib}. " \ f"Faiss may not be compiled yet." -) if found_swigfaiss_generic: print(f"Copying {swigfaiss_generic_lib}") @@ -72,7 +64,7 @@ shutil.copyfile("swigfaiss_sve.py", "faiss/swigfaiss_sve.py") shutil.copyfile(swigfaiss_sve_lib, f"faiss/_swigfaiss_sve{ext}") -long_description = """ +long_description=""" Faiss is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting @@ -81,19 +73,20 @@ are implemented on the GPU. It is developed by Facebook AI Research. """ setup( - name="faiss", - version="1.8.0", - description="A library for efficient similarity search and clustering of dense vectors", + name='faiss', + version='1.8.0', + description='A library for efficient similarity search and clustering of dense vectors', long_description=long_description, - url="https://github.com/facebookresearch/faiss", - author="Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini", - author_email="matthijs@meta.com", - license="MIT", - keywords="search nearest neighbors", - install_requires=["numpy", "packaging"], - packages=["faiss", "faiss.contrib", "faiss.contrib.torch"], + url='https://github.com/facebookresearch/faiss', + author='Matthijs Douze, Jeff Johnson, Herve Jegou, Lucas Hosseini', + author_email='matthijs@meta.com', + license='MIT', + keywords='search nearest neighbors', + + install_requires=['numpy', 'packaging'], + packages=['faiss', 'faiss.contrib', 'faiss.contrib.torch'], package_data={ - "faiss": ["*.so", "*.pyd"], + 'faiss': ['*.so', '*.pyd'], }, zip_safe=False, ) From 3887e1dbffd3d1e584f71728531393211edaa885 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 16:19:54 -0700 Subject: [PATCH 18/22] BENCHMARK_ENABLE_TESTING off --- perf_tests/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 21050f535d..7ee349b89e 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -6,17 +6,15 @@ # @lint-ignore-every LINEWRAP project(faiss_perf_tests) +set(BENCHMARK_ENABLE_TESTING OFF) include(FetchContent) FetchContent_Declare(googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG main) # need main for benchmark::benchmark - -set(BENCHMARK_ENABLE_TESTING OFF) FetchContent_MakeAvailable( googlebenchmark) -set(BENCHMARK_ENABLE_TESTING OFF) find_package(Threads REQUIRED) find_package(OpenMP REQUIRED) From 5d9c877610fa4250b487bea47f50909e0baeda75 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Fri, 20 Sep 2024 16:25:48 -0700 Subject: [PATCH 19/22] disable benchmarks in rocm --- .github/actions/build_cmake/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 7435102791..3fad247b6d 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -145,7 +145,7 @@ runs: make -C build test - name: C++ perf benchmarks shell: bash - if: inputs.gpu == 'OFF' + if: inputs.rocm == 'OFF' run: | find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \; - name: Install Python extension From 18da9ce6999ad8daec22b31043548b8d3f4e26f1 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Sat, 21 Sep 2024 07:01:25 -0700 Subject: [PATCH 20/22] simplify cmakelist --- perf_tests/CMakeLists.txt | 88 ++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 7ee349b89e..30ba338649 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -27,53 +27,57 @@ add_library(faiss_perf_tests_utils target_include_directories(faiss_perf_tests_utils PRIVATE ${PROJECT_SOURCE_DIR}/../..) -if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") - target_link_libraries(faiss_perf_tests_utils PRIVATE faiss) -endif() - -if(FAISS_OPT_LEVEL STREQUAL "avx2") - if(NOT WIN32) - target_compile_options(faiss_perf_tests_utils PRIVATE $<$:-mavx2 -mfma>) - else() - target_compile_options(faiss_perf_tests_utils PRIVATE $<$:/arch:AVX2>) +function(link_to_faiss_lib target) + if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") + target_link_libraries(${target} PRIVATE faiss) endif() - target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_avx2) -endif() - -if(FAISS_OPT_LEVEL STREQUAL "avx512") - if(NOT WIN32) - target_compile_options(faiss_perf_tests_utils PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) - else() - target_compile_options(faiss_perf_tests_utils PRIVATE $<$:/arch:AVX512>) + + if(FAISS_OPT_LEVEL STREQUAL "avx2") + if(NOT WIN32) + target_compile_options(${target} PRIVATE $<$:-mavx2 -mfma>) + else() + target_compile_options(${target} PRIVATE $<$:/arch:AVX2>) + endif() + target_link_libraries(${target} PRIVATE faiss_avx2) endif() - target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_avx512) -endif() - -if(FAISS_OPT_LEVEL STREQUAL "sve") - if(NOT WIN32) - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:-march=armv8-a+sve>) + + if(FAISS_OPT_LEVEL STREQUAL "avx512") + if(NOT WIN32) + target_compile_options(${target} PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) + else() + target_compile_options(${target} PRIVATE $<$:/arch:AVX512>) endif() - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(faiss_perf_tests_utils PRIVATE $<$,$>:-march=armv8-a+sve>) + target_link_libraries(${target} PRIVATE faiss_avx512) + endif() + + if(FAISS_OPT_LEVEL STREQUAL "sve") + if(NOT WIN32) + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${target} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${target} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") + # Do nothing, expect SVE to be enabled by -march=native + elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") + # Add +sve + target_compile_options(${target} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) + elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") + # No valid -march, so specify -march=armv8-a+sve as the default + target_compile_options(${target} PRIVATE $<$,$>:-march=armv8-a+sve>) + endif() + else() + # TODO: support Windows endif() - else() - # TODO: support Windows + target_link_libraries(${target} tests_utils PRIVATE faiss_sve) endif() - target_link_libraries(faiss_perf_tests_utils PRIVATE faiss_sve) -endif() +endfunction() + +link_to_faiss_lib(faiss_perf_tests_utils) set(FAISS_PERF_TEST_SRC bench_no_multithreading_rcq_search.cpp From a12eb42899228f8c968bf1daa4c2f5eb54c0c7f6 Mon Sep 17 00:00:00 2001 From: mengdilin Date: Sat, 21 Sep 2024 07:02:23 -0700 Subject: [PATCH 21/22] simplify cmake 2.0 --- perf_tests/CMakeLists.txt | 52 ++------------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 30ba338649..aa8a3479ed 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -87,57 +87,9 @@ set(FAISS_PERF_TEST_SRC bench_scalar_quantizer_encode.cpp ) foreach(bench ${FAISS_PERF_TEST_SRC}) - get_filename_component(bench_exec ${bench} NAME_WE) - + get_filename_component(bench_exec ${bench} NAME_WE) add_executable(${bench_exec} ${bench}) - - if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "sve") - target_link_libraries(${bench_exec} PRIVATE faiss) - endif() - - if(FAISS_OPT_LEVEL STREQUAL "avx2") - if(NOT WIN32) - target_compile_options(${bench_exec} PRIVATE $<$:-mavx2 -mfma>) - else() - target_compile_options(${bench_exec} PRIVATE $<$:/arch:AVX2>) - endif() - target_link_libraries(${bench_exec} PRIVATE faiss_avx2) - endif() - - if(FAISS_OPT_LEVEL STREQUAL "avx512") - if(NOT WIN32) - target_compile_options(${bench_exec} PRIVATE $<$:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>) - else() - target_compile_options(${bench_exec} PRIVATE $<$:/arch:AVX512>) - endif() - target_link_libraries(${bench_exec} PRIVATE faiss_avx512) - endif() - - if(FAISS_OPT_LEVEL STREQUAL "sve") - if(NOT WIN32) - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(${bench_exec} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(${bench_exec} PRIVATE $<$,$>:-march=armv8-a+sve>) - endif() - if("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=native") - # Do nothing, expect SVE to be enabled by -march=native - elseif("${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )(-march=armv[0-9]+(\\.[1-9]+)?-[^+ ](\\+[^+$ ]+)*)") - # Add +sve - target_compile_options(${bench_exec} PRIVATE $<$,$>:${CMAKE_MATCH_2}+sve>) - elseif(NOT "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} " MATCHES "(^| )-march=armv") - # No valid -march, so specify -march=armv8-a+sve as the default - target_compile_options(${bench_exec} PRIVATE $<$,$>:-march=armv8-a+sve>) - endif() - else() - # TODO: support Windows - endif() - target_link_libraries(${bench_exec} PRIVATE faiss_sve) - endif() + link_to_faiss_lib(${bench_exec}) target_link_libraries(${bench_exec} PRIVATE faiss_perf_tests_utils OpenMP::OpenMP_CXX benchmark::benchmark gflags) # `#include ` or any other headers target_include_directories(${bench_exec} PRIVATE From da9470702d92f139b28bf4fe69673b4e4296239a Mon Sep 17 00:00:00 2001 From: mengdilin Date: Sat, 21 Sep 2024 07:20:17 -0700 Subject: [PATCH 22/22] fix typo --- perf_tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index aa8a3479ed..b7c325dcb4 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -73,7 +73,7 @@ function(link_to_faiss_lib target) else() # TODO: support Windows endif() - target_link_libraries(${target} tests_utils PRIVATE faiss_sve) + target_link_libraries(${target} PRIVATE faiss_sve) endif() endfunction()