From 5389756cce52968b092df88602517688a853049a Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Wed, 29 May 2024 19:59:41 +0530 Subject: [PATCH 1/3] Add OpenMP guards --- faiss/IndexIDMap.cpp | 1 - faiss/IndexScalarQuantizer.cpp | 2 +- faiss/impl/ScalarQuantizer.cpp | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/faiss/IndexIDMap.cpp b/faiss/IndexIDMap.cpp index ecac7ed454..d62ff24490 100644 --- a/faiss/IndexIDMap.cpp +++ b/faiss/IndexIDMap.cpp @@ -120,7 +120,6 @@ void IndexIDMapTemplate::search( } index->search(n, x, k, distances, labels, params); idx_t* li = labels; -#pragma omp parallel for for (idx_t i = 0; i < n * k; i++) { li[i] = li[i] < 0 ? li[i] : id_map[li[i]]; } diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp index 4189bcd034..6fe92255a5 100644 --- a/faiss/IndexScalarQuantizer.cpp +++ b/faiss/IndexScalarQuantizer.cpp @@ -58,7 +58,7 @@ void IndexScalarQuantizer::search( FAISS_THROW_IF_NOT( metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT); -#pragma omp parallel +#pragma omp parallel if (n > 1) { InvertedListScanner* scanner = sq.select_InvertedListScanner(metric_type, nullptr, true, sel); diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index a3cf4c744e..d7b3f78016 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -601,7 +601,7 @@ void train_NonUniform( } } std::vector trained_d(2); -#pragma omp parallel for +#pragma omp parallel for if (d > 1) for (int j = 0; j < d; j++) { train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d); vmin[j] = trained_d[0]; @@ -1157,7 +1157,7 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) std::unique_ptr squant(select_quantizer()); memset(codes, 0, code_size * n); -#pragma omp parallel for +#pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->encode_vector(x + i * d, codes + i * code_size); } @@ -1165,7 +1165,7 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const { std::unique_ptr squant(select_quantizer()); -#pragma omp parallel for +#pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->decode_vector(codes + i * code_size, x + i * d); } From 7f4c5874a8a7986b61b4cfa8b3c89ad4c15f102f Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Thu, 30 May 2024 20:37:14 +0530 Subject: [PATCH 2/3] add comments for openmp guards --- faiss/IndexScalarQuantizer.cpp | 3 +++ faiss/impl/ScalarQuantizer.cpp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp index 6fe92255a5..d4442aae0a 100644 --- a/faiss/IndexScalarQuantizer.cpp +++ b/faiss/IndexScalarQuantizer.cpp @@ -58,6 +58,9 @@ void IndexScalarQuantizer::search( FAISS_THROW_IF_NOT( metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT); +// adding an openMP guard here to spawn threads only if n > 1, where n is the number +// of queries in the batch. If n = 1, then the search is done in a single thread. +// This is done to avoid the overhead of spawning threads for executing sequential code. #pragma omp parallel if (n > 1) { InvertedListScanner* scanner = diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index d7b3f78016..58507ecfa3 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -601,6 +601,8 @@ void train_NonUniform( } } std::vector trained_d(2); + // add an openMP guard here to prevent spawning threads + // when d = 1 (which would indicate sequential execution) #pragma omp parallel for if (d > 1) for (int j = 0; j < d; j++) { train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d); @@ -1157,6 +1159,8 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) std::unique_ptr squant(select_quantizer()); memset(codes, 0, code_size * n); + // add an openMP guard here to prevent spawning threads + // when n = 1 (which would indicate sequential execution) #pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->encode_vector(x + i * d, codes + i * code_size); @@ -1165,6 +1169,8 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const { std::unique_ptr squant(select_quantizer()); + // add an openMP guard here to prevent spawning threads + // when n = 1 (which would indicate sequential execution) #pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->decode_vector(codes + i * code_size, x + i * d); From 709c41d83ae1bfe2a68542c89a398547460505b8 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Thu, 30 May 2024 10:16:51 -0600 Subject: [PATCH 3/3] Adjust commentary --- faiss/IndexIDMap.cpp | 2 ++ faiss/IndexScalarQuantizer.cpp | 3 ++- faiss/impl/ScalarQuantizer.cpp | 17 +++++++++++------ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/faiss/IndexIDMap.cpp b/faiss/IndexIDMap.cpp index d62ff24490..6ce069da5e 100644 --- a/faiss/IndexIDMap.cpp +++ b/faiss/IndexIDMap.cpp @@ -120,6 +120,8 @@ void IndexIDMapTemplate::search( } index->search(n, x, k, distances, labels, params); idx_t* li = labels; + + // Dropping omp parallel for bleve for (idx_t i = 0; i < n * k; i++) { li[i] = li[i] < 0 ? li[i] : id_map[li[i]]; } diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp index d4442aae0a..94268af688 100644 --- a/faiss/IndexScalarQuantizer.cpp +++ b/faiss/IndexScalarQuantizer.cpp @@ -58,9 +58,10 @@ void IndexScalarQuantizer::search( FAISS_THROW_IF_NOT( metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT); -// adding an openMP guard here to spawn threads only if n > 1, where n is the number +// Adding an openMP guard here to spawn threads only if n > 1, where n is the number // of queries in the batch. If n = 1, then the search is done in a single thread. // This is done to avoid the overhead of spawning threads for executing sequential code. +// This is for bleve, more in: MB-61930 #pragma omp parallel if (n > 1) { InvertedListScanner* scanner = diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index 58507ecfa3..a3bf7f0c8a 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -601,8 +601,10 @@ void train_NonUniform( } } std::vector trained_d(2); - // add an openMP guard here to prevent spawning threads - // when d = 1 (which would indicate sequential execution) + +// Add an openMP guard here to prevent spawning threads +// when d = 1 (which would indicate sequential execution). +// This is for bleve, more in MB-61930. #pragma omp parallel for if (d > 1) for (int j = 0; j < d; j++) { train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d); @@ -1159,8 +1161,10 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) std::unique_ptr squant(select_quantizer()); memset(codes, 0, code_size * n); - // add an openMP guard here to prevent spawning threads - // when n = 1 (which would indicate sequential execution) + +// Add an openMP guard here to prevent spawning threads +// when n = 1 (which would indicate sequential execution). +// This is for bleve, more in MB-61930. #pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->encode_vector(x + i * d, codes + i * code_size); @@ -1169,8 +1173,9 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n) void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const { std::unique_ptr squant(select_quantizer()); - // add an openMP guard here to prevent spawning threads - // when n = 1 (which would indicate sequential execution) +// Add an openMP guard here to prevent spawning threads +// when n = 1 (which would indicate sequential execution). +// This is for bleve, more in MB-61930. #pragma omp parallel for if (n > 1) for (int64_t i = 0; i < n; i++) squant->decode_vector(codes + i * code_size, x + i * d);