diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmark.java index 6a417bb785f13..0d52c032b18c8 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmark.java @@ -73,13 +73,20 @@ public class VectorScorerByteBulkBenchmark { @Param({ "1024" }) public int dims; - // 128k is typically enough to not fit in L1 (core) cache for most processors; - // 1.5M is typically enough to not fit in L2 (core) cache; - // 130M is enough to not fit in L3 cache + // 128kb is typically enough to not fit in L1 (core) cache for most processors; + // 1.5Mb is typically enough to not fit in L2 (core) cache; + // 130Mb is enough to not fit in L3 cache @Param({ "128", "1500", "130000" }) public int numVectors; public int numVectorsToScore; + // Bulk sizes to test. + // HNSW params will have the distributed ordinal bulk sizes depending on the number of connections in the graph + // The default is 16, maximum is 512, and the bottom layer is 2x that the configured setting, so 1024 is a maximum + // the MOST common case here is 32 + @Param({ "32", "64", "256", "1024" }) + public int bulkSize; + @Param public VectorImplementation implementation; @@ -101,7 +108,7 @@ private ScalarCosine(ByteVectorValues values) { @Override public float score(int ordinal) throws IOException { - return normalize(cosine(queryVector, values.vectorValue(ordinal))); + return normalize(ScalarOperations.cosine(queryVector, values.vectorValue(ordinal))); } private float normalize(float cosine) { @@ -131,7 +138,7 @@ private ScalarDotProduct(ByteVectorValues values) { @Override public float score(int ordinal) throws IOException { - return normalize(dotProduct(queryVector, values.vectorValue(ordinal))); + return normalize(ScalarOperations.dotProduct(queryVector, values.vectorValue(ordinal))); } private float normalize(int dotProduct) { @@ -162,7 +169,7 @@ private ScalarSquareDistance(ByteVectorValues values) { @Override public float score(int ordinal) throws IOException { - return VectorUtil.normalizeDistanceToUnitInterval(squareDistance(queryVector, values.vectorValue(ordinal))); + return VectorUtil.normalizeDistanceToUnitInterval(ScalarOperations.squareDistance(queryVector, values.vectorValue(ordinal))); } @Override @@ -179,6 +186,7 @@ public void setScoringOrdinal(int targetOrd) throws IOException { private float[] scores; private int[] ordinals; private int[] ids; + private int[] toScore; // scratch array for bulk scoring private UpdateableRandomVectorScorer scorer; private RandomVectorScorer queryScorer; @@ -224,7 +232,8 @@ void setup(VectorData vectorData) throws IOException { writeByteVectorData(dir, vectorData.vectorData); numVectorsToScore = vectorData.numVectorsToScore; - scores = new float[numVectorsToScore]; + scores = new float[bulkSize]; + toScore = new int[bulkSize]; ids = IntStream.range(0, numVectors).toArray(); ordinals = vectorData.ordinals; @@ -266,75 +275,67 @@ public void teardown() throws IOException { @Benchmark public float[] scoreMultipleSequential() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = scorer.score(v); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(v); + } } return scores; } @Benchmark public float[] scoreMultipleRandom() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = scorer.score(ordinals[v]); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(ordinals[v]); + } } return scores; } @Benchmark public float[] scoreQueryMultipleRandom() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = queryScorer.score(ordinals[v]); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = queryScorer.score(ordinals[v]); + } } return scores; } @Benchmark public float[] scoreMultipleSequentialBulk() throws IOException { - scorer.bulkScore(ids, scores, ordinals.length); + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of sequential IDs to the scratch array + System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } return scores; } @Benchmark public float[] scoreMultipleRandomBulk() throws IOException { - scorer.bulkScore(ordinals, scores, ordinals.length); + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of random ordinals to the scratch array + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } return scores; } @Benchmark public float[] scoreQueryMultipleRandomBulk() throws IOException { - queryScorer.bulkScore(ordinals, scores, ordinals.length); - return scores; - } - - static float cosine(byte[] a, byte[] b) { - int sum = 0; - int norm1 = 0; - int norm2 = 0; - - for (int i = 0; i < a.length; i++) { - byte elem1 = a[i]; - byte elem2 = b[i]; - sum += elem1 * elem2; - norm1 += elem1 * elem1; - norm2 += elem2 * elem2; - } - return (float) (sum / Math.sqrt((double) norm1 * (double) norm2)); - } - - static int dotProduct(byte[] a, byte[] b) { - int res = 0; - for (int i = 0; i < a.length; i++) { - res += a[i] * b[i]; + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of random ordinals to the scratch array + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + queryScorer.bulkScore(toScore, scores, toScoreInThisBatch); } - return res; - } - - static int squareDistance(byte[] a, byte[] b) { - int res = 0; - for (int i = 0; i < a.length; i++) { - int d = a[i] - b[i]; - res += d * d; - } - return res; + return scores; } } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmark.java index 20ec0da8b2f97..4dfe24917841e 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmark.java @@ -74,13 +74,20 @@ public class VectorScorerFloatBulkBenchmark { @Param({ "1024" }) public int dims; - // 128k is typically enough to not fit in L1 (core) cache for most processors; - // 1.5M is typically enough to not fit in L2 (core) cache; - // 130M is enough to not fit in L3 cache + // 32 * 4 = 128kb is typically enough to not fit in L1 (core) cache for most processors; + // 375 * 4 = 1.5Mb is typically enough to not fit in L2 (core) cache; + // 32500 * 4 = 130Mb is enough to not fit in L3 cache @Param({ "32", "375", "32500" }) public int numVectors; public int numVectorsToScore; + // Bulk sizes to test. + // HNSW params will have the distributed ordinal bulk sizes depending on the number of connections in the graph + // The default is 16, maximum is 512, and the bottom layer is 2x that the configured setting, so 1024 is a maximum + // the MOST common case here is 32 + @Param({ "32", "64", "256", "1024" }) + public int bulkSize; + @Param public VectorImplementation implementation; @@ -144,6 +151,7 @@ public void setScoringOrdinal(int targetOrd) throws IOException { private float[] scores; private int[] ordinals; private int[] ids; + private int[] toScore; // scratch array for bulk scoring private UpdateableRandomVectorScorer scorer; private RandomVectorScorer queryScorer; @@ -193,7 +201,8 @@ void setup(VectorData vectorData) throws IOException { writeFloatVectorData(dir, vectorData.vectorData); numVectorsToScore = vectorData.numVectorsToScore; - scores = new float[numVectorsToScore]; + scores = new float[bulkSize]; + toScore = new int[bulkSize]; ids = IntStream.range(0, numVectors).toArray(); ordinals = vectorData.ordinals; @@ -234,43 +243,67 @@ public void teardown() throws IOException { @Benchmark public float[] scoreMultipleSequential() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = scorer.score(v); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(v); + } } return scores; } @Benchmark public float[] scoreMultipleRandom() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = scorer.score(ordinals[v]); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(ordinals[v]); + } } return scores; } @Benchmark public float[] scoreQueryMultipleRandom() throws IOException { - for (int v = 0; v < numVectorsToScore; v++) { - scores[v] = queryScorer.score(ordinals[v]); + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = queryScorer.score(ordinals[v]); + } } return scores; } @Benchmark public float[] scoreMultipleSequentialBulk() throws IOException { - scorer.bulkScore(ids, scores, ordinals.length); + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of sequential IDs to the scratch array + System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } return scores; } @Benchmark public float[] scoreMultipleRandomBulk() throws IOException { - scorer.bulkScore(ordinals, scores, ordinals.length); + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of random ordinals to the scratch array + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } return scores; } @Benchmark public float[] scoreQueryMultipleRandomBulk() throws IOException { - queryScorer.bulkScore(ordinals, scores, ordinals.length); + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + // Copy the slice of random ordinals to the scratch array + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + queryScorer.bulkScore(toScore, scores, toScoreInThisBatch); + } return scores; } } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt7uBulkBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt7uBulkBenchmark.java index 6f55633650533..88c9f18109bb4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt7uBulkBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt7uBulkBenchmark.java @@ -89,7 +89,7 @@ public class VectorScorerInt7uBulkBenchmark { @Param({ "16", "32", "64", "256", "1024" }) public int bulkSize; - @Param({ "SCALAR", "LUCENE", "NATIVE" }) + @Param public VectorImplementation implementation; @Param({ "DOT_PRODUCT", "EUCLIDEAN" }) diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmarkTests.java index 8f01aa429008d..b5d26206501a0 100644 --- a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmarkTests.java +++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerByteBulkBenchmarkTests.java @@ -48,6 +48,7 @@ public void testSequential() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try { @@ -78,6 +79,7 @@ public void testRandom() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try { @@ -109,6 +111,7 @@ public void testQueryRandom() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try { diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmarkTests.java index 31d1d0fcb9111..8d5dc37602ba0 100644 --- a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmarkTests.java +++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerFloatBulkBenchmarkTests.java @@ -49,6 +49,7 @@ public void testSequential() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try { @@ -79,6 +80,7 @@ public void testRandom() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try { @@ -110,6 +112,7 @@ public void testQueryRandom() throws Exception { bench.dims = dims; bench.numVectors = 1000; bench.numVectorsToScore = 200; + bench.bulkSize = 200; bench.setup(vectorData); try {