From 5389756cce52968b092df88602517688a853049a Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Wed, 29 May 2024 19:59:41 +0530
Subject: [PATCH 1/3] Add OpenMP guards

---
 faiss/IndexIDMap.cpp           | 1 -
 faiss/IndexScalarQuantizer.cpp | 2 +-
 faiss/impl/ScalarQuantizer.cpp | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/faiss/IndexIDMap.cpp b/faiss/IndexIDMap.cpp
index ecac7ed454..d62ff24490 100644
--- a/faiss/IndexIDMap.cpp
+++ b/faiss/IndexIDMap.cpp
@@ -120,7 +120,6 @@ void IndexIDMapTemplate<IndexT>::search(
     }
     index->search(n, x, k, distances, labels, params);
     idx_t* li = labels;
-#pragma omp parallel for
     for (idx_t i = 0; i < n * k; i++) {
         li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
     }
diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp
index 4189bcd034..6fe92255a5 100644
--- a/faiss/IndexScalarQuantizer.cpp
+++ b/faiss/IndexScalarQuantizer.cpp
@@ -58,7 +58,7 @@ void IndexScalarQuantizer::search(
     FAISS_THROW_IF_NOT(
             metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT);
 
-#pragma omp parallel
+#pragma omp parallel if (n > 1)
     {
         InvertedListScanner* scanner =
                 sq.select_InvertedListScanner(metric_type, nullptr, true, sel);
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
index a3cf4c744e..d7b3f78016 100644
--- a/faiss/impl/ScalarQuantizer.cpp
+++ b/faiss/impl/ScalarQuantizer.cpp
@@ -601,7 +601,7 @@ void train_NonUniform(
             }
         }
         std::vector<float> trained_d(2);
-#pragma omp parallel for
+#pragma omp parallel for if (d > 1)
         for (int j = 0; j < d; j++) {
             train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d);
             vmin[j] = trained_d[0];
@@ -1157,7 +1157,7 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
     memset(codes, 0, code_size * n);
-#pragma omp parallel for
+#pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->encode_vector(x + i * d, codes + i * code_size);
 }
@@ -1165,7 +1165,7 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
 void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
-#pragma omp parallel for
+#pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->decode_vector(codes + i * code_size, x + i * d);
 }

From 7f4c5874a8a7986b61b4cfa8b3c89ad4c15f102f Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Thu, 30 May 2024 20:37:14 +0530
Subject: [PATCH 2/3] add comments for openmp guards

---
 faiss/IndexScalarQuantizer.cpp | 3 +++
 faiss/impl/ScalarQuantizer.cpp | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp
index 6fe92255a5..d4442aae0a 100644
--- a/faiss/IndexScalarQuantizer.cpp
+++ b/faiss/IndexScalarQuantizer.cpp
@@ -58,6 +58,9 @@ void IndexScalarQuantizer::search(
     FAISS_THROW_IF_NOT(
             metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT);
 
+// adding an openMP guard here to spawn threads only if n > 1, where n is the number 
+// of queries in the batch. If n = 1, then the search is done in a single thread.
+// This is done to avoid the overhead of spawning threads for executing sequential code.
 #pragma omp parallel if (n > 1)
     {
         InvertedListScanner* scanner =
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
index d7b3f78016..58507ecfa3 100644
--- a/faiss/impl/ScalarQuantizer.cpp
+++ b/faiss/impl/ScalarQuantizer.cpp
@@ -601,6 +601,8 @@ void train_NonUniform(
             }
         }
         std::vector<float> trained_d(2);
+        // add an openMP guard here to prevent spawning threads
+        // when d = 1 (which would indicate sequential execution) 
 #pragma omp parallel for if (d > 1)
         for (int j = 0; j < d; j++) {
             train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d);
@@ -1157,6 +1159,8 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
     memset(codes, 0, code_size * n);
+    // add an openMP guard here to prevent spawning threads
+    // when n = 1 (which would indicate sequential execution) 
 #pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->encode_vector(x + i * d, codes + i * code_size);
@@ -1165,6 +1169,8 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
 void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
+    // add an openMP guard here to prevent spawning threads
+    // when n = 1 (which would indicate sequential execution) 
 #pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->decode_vector(codes + i * code_size, x + i * d);

From 709c41d83ae1bfe2a68542c89a398547460505b8 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Thu, 30 May 2024 10:16:51 -0600
Subject: [PATCH 3/3] Adjust commentary

---
 faiss/IndexIDMap.cpp           |  2 ++
 faiss/IndexScalarQuantizer.cpp |  3 ++-
 faiss/impl/ScalarQuantizer.cpp | 17 +++++++++++------
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/faiss/IndexIDMap.cpp b/faiss/IndexIDMap.cpp
index d62ff24490..6ce069da5e 100644
--- a/faiss/IndexIDMap.cpp
+++ b/faiss/IndexIDMap.cpp
@@ -120,6 +120,8 @@ void IndexIDMapTemplate<IndexT>::search(
     }
     index->search(n, x, k, distances, labels, params);
     idx_t* li = labels;
+
+    // Dropping omp parallel for bleve
     for (idx_t i = 0; i < n * k; i++) {
         li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
     }
diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp
index d4442aae0a..94268af688 100644
--- a/faiss/IndexScalarQuantizer.cpp
+++ b/faiss/IndexScalarQuantizer.cpp
@@ -58,9 +58,10 @@ void IndexScalarQuantizer::search(
     FAISS_THROW_IF_NOT(
             metric_type == METRIC_L2 || metric_type == METRIC_INNER_PRODUCT);
 
-// adding an openMP guard here to spawn threads only if n > 1, where n is the number 
+// Adding an openMP guard here to spawn threads only if n > 1, where n is the number
 // of queries in the batch. If n = 1, then the search is done in a single thread.
 // This is done to avoid the overhead of spawning threads for executing sequential code.
+// This is for bleve, more in: MB-61930
 #pragma omp parallel if (n > 1)
     {
         InvertedListScanner* scanner =
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
index 58507ecfa3..a3bf7f0c8a 100644
--- a/faiss/impl/ScalarQuantizer.cpp
+++ b/faiss/impl/ScalarQuantizer.cpp
@@ -601,8 +601,10 @@ void train_NonUniform(
             }
         }
         std::vector<float> trained_d(2);
-        // add an openMP guard here to prevent spawning threads
-        // when d = 1 (which would indicate sequential execution) 
+
+// Add an openMP guard here to prevent spawning threads
+// when d = 1 (which would indicate sequential execution).
+// This is for bleve, more in MB-61930.
 #pragma omp parallel for if (d > 1)
         for (int j = 0; j < d; j++) {
             train_Uniform(rs, rs_arg, n, k, xt.data() + j * n, trained_d);
@@ -1159,8 +1161,10 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
     memset(codes, 0, code_size * n);
-    // add an openMP guard here to prevent spawning threads
-    // when n = 1 (which would indicate sequential execution) 
+
+// Add an openMP guard here to prevent spawning threads
+// when n = 1 (which would indicate sequential execution).
+// This is for bleve, more in MB-61930.
 #pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->encode_vector(x + i * d, codes + i * code_size);
@@ -1169,8 +1173,9 @@ void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
 void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
     std::unique_ptr<SQuantizer> squant(select_quantizer());
 
-    // add an openMP guard here to prevent spawning threads
-    // when n = 1 (which would indicate sequential execution) 
+// Add an openMP guard here to prevent spawning threads
+// when n = 1 (which would indicate sequential execution).
+// This is for bleve, more in MB-61930.
 #pragma omp parallel for if (n > 1)
     for (int64_t i = 0; i < n; i++)
         squant->decode_vector(codes + i * code_size, x + i * d);