Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,20 @@ public class VectorScorerByteBulkBenchmark {
@Param({ "1024" })
public int dims;

// 128k is typically enough to not fit in L1 (core) cache for most processors;
// 1.5M is typically enough to not fit in L2 (core) cache;
// 130M is enough to not fit in L3 cache
// 128kb is typically enough to not fit in L1 (core) cache for most processors;
// 1.5Mb is typically enough to not fit in L2 (core) cache;
// 130Mb is enough to not fit in L3 cache
@Param({ "128", "1500", "130000" })
public int numVectors;
public int numVectorsToScore;

// Bulk sizes to test.
// HNSW params will have the distributed ordinal bulk sizes depending on the number of connections in the graph
// The default is 16, maximum is 512, and the bottom layer is 2x that the configured setting, so 1024 is a maximum
// the MOST common case here is 32
@Param({ "32", "64", "256", "1024" })
public int bulkSize;

@Param
public VectorImplementation implementation;

Expand All @@ -101,7 +108,7 @@ private ScalarCosine(ByteVectorValues values) {

@Override
public float score(int ordinal) throws IOException {
return normalize(cosine(queryVector, values.vectorValue(ordinal)));
return normalize(ScalarOperations.cosine(queryVector, values.vectorValue(ordinal)));
}

private float normalize(float cosine) {
Expand Down Expand Up @@ -131,7 +138,7 @@ private ScalarDotProduct(ByteVectorValues values) {

@Override
public float score(int ordinal) throws IOException {
return normalize(dotProduct(queryVector, values.vectorValue(ordinal)));
return normalize(ScalarOperations.dotProduct(queryVector, values.vectorValue(ordinal)));
}

private float normalize(int dotProduct) {
Expand Down Expand Up @@ -162,7 +169,7 @@ private ScalarSquareDistance(ByteVectorValues values) {

@Override
public float score(int ordinal) throws IOException {
return VectorUtil.normalizeDistanceToUnitInterval(squareDistance(queryVector, values.vectorValue(ordinal)));
return VectorUtil.normalizeDistanceToUnitInterval(ScalarOperations.squareDistance(queryVector, values.vectorValue(ordinal)));
}

@Override
Expand All @@ -179,6 +186,7 @@ public void setScoringOrdinal(int targetOrd) throws IOException {
private float[] scores;
private int[] ordinals;
private int[] ids;
private int[] toScore; // scratch array for bulk scoring

private UpdateableRandomVectorScorer scorer;
private RandomVectorScorer queryScorer;
Expand Down Expand Up @@ -224,7 +232,8 @@ void setup(VectorData vectorData) throws IOException {
writeByteVectorData(dir, vectorData.vectorData);

numVectorsToScore = vectorData.numVectorsToScore;
scores = new float[numVectorsToScore];
scores = new float[bulkSize];
toScore = new int[bulkSize];
ids = IntStream.range(0, numVectors).toArray();
ordinals = vectorData.ordinals;

Expand Down Expand Up @@ -266,75 +275,67 @@ public void teardown() throws IOException {

@Benchmark
public float[] scoreMultipleSequential() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = scorer.score(v);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = scorer.score(v);
}
}
return scores;
}

@Benchmark
public float[] scoreMultipleRandom() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = scorer.score(ordinals[v]);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = scorer.score(ordinals[v]);
}
}
return scores;
}

@Benchmark
public float[] scoreQueryMultipleRandom() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = queryScorer.score(ordinals[v]);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = queryScorer.score(ordinals[v]);
}
}
return scores;
}

@Benchmark
public float[] scoreMultipleSequentialBulk() throws IOException {
scorer.bulkScore(ids, scores, ordinals.length);
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of sequential IDs to the scratch array
System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch);
scorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return scores;
}

@Benchmark
public float[] scoreMultipleRandomBulk() throws IOException {
scorer.bulkScore(ordinals, scores, ordinals.length);
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of random ordinals to the scratch array
System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
scorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return scores;
}

@Benchmark
public float[] scoreQueryMultipleRandomBulk() throws IOException {
queryScorer.bulkScore(ordinals, scores, ordinals.length);
return scores;
}

static float cosine(byte[] a, byte[] b) {
int sum = 0;
int norm1 = 0;
int norm2 = 0;

for (int i = 0; i < a.length; i++) {
byte elem1 = a[i];
byte elem2 = b[i];
sum += elem1 * elem2;
norm1 += elem1 * elem1;
norm2 += elem2 * elem2;
}
return (float) (sum / Math.sqrt((double) norm1 * (double) norm2));
}

static int dotProduct(byte[] a, byte[] b) {
int res = 0;
for (int i = 0; i < a.length; i++) {
res += a[i] * b[i];
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of random ordinals to the scratch array
System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
queryScorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return res;
}

static int squareDistance(byte[] a, byte[] b) {
int res = 0;
for (int i = 0; i < a.length; i++) {
int d = a[i] - b[i];
res += d * d;
}
return res;
return scores;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,20 @@ public class VectorScorerFloatBulkBenchmark {
@Param({ "1024" })
public int dims;

// 128k is typically enough to not fit in L1 (core) cache for most processors;
// 1.5M is typically enough to not fit in L2 (core) cache;
// 130M is enough to not fit in L3 cache
// 32 * 4 = 128kb is typically enough to not fit in L1 (core) cache for most processors;
// 375 * 4 = 1.5Mb is typically enough to not fit in L2 (core) cache;
// 32500 * 4 = 130Mb is enough to not fit in L3 cache
@Param({ "32", "375", "32500" })
public int numVectors;
public int numVectorsToScore;

// Bulk sizes to test.
// HNSW params will have the distributed ordinal bulk sizes depending on the number of connections in the graph
// The default is 16, maximum is 512, and the bottom layer is 2x that the configured setting, so 1024 is a maximum
// the MOST common case here is 32
@Param({ "32", "64", "256", "1024" })
public int bulkSize;

@Param
public VectorImplementation implementation;

Expand Down Expand Up @@ -144,6 +151,7 @@ public void setScoringOrdinal(int targetOrd) throws IOException {
private float[] scores;
private int[] ordinals;
private int[] ids;
private int[] toScore; // scratch array for bulk scoring

private UpdateableRandomVectorScorer scorer;
private RandomVectorScorer queryScorer;
Expand Down Expand Up @@ -193,7 +201,8 @@ void setup(VectorData vectorData) throws IOException {
writeFloatVectorData(dir, vectorData.vectorData);

numVectorsToScore = vectorData.numVectorsToScore;
scores = new float[numVectorsToScore];
scores = new float[bulkSize];
toScore = new int[bulkSize];
ids = IntStream.range(0, numVectors).toArray();
ordinals = vectorData.ordinals;

Expand Down Expand Up @@ -234,43 +243,67 @@ public void teardown() throws IOException {

@Benchmark
public float[] scoreMultipleSequential() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = scorer.score(v);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = scorer.score(v);
}
}
return scores;
}

@Benchmark
public float[] scoreMultipleRandom() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = scorer.score(ordinals[v]);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = scorer.score(ordinals[v]);
}
}
return scores;
}

@Benchmark
public float[] scoreQueryMultipleRandom() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = queryScorer.score(ordinals[v]);
int v = 0;
while (v < numVectorsToScore) {
for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
scores[i] = queryScorer.score(ordinals[v]);
}
}
return scores;
}

@Benchmark
public float[] scoreMultipleSequentialBulk() throws IOException {
scorer.bulkScore(ids, scores, ordinals.length);
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of sequential IDs to the scratch array
System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch);
scorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return scores;
}

@Benchmark
public float[] scoreMultipleRandomBulk() throws IOException {
scorer.bulkScore(ordinals, scores, ordinals.length);
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of random ordinals to the scratch array
System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
scorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return scores;
}

@Benchmark
public float[] scoreQueryMultipleRandomBulk() throws IOException {
queryScorer.bulkScore(ordinals, scores, ordinals.length);
for (int i = 0; i < numVectorsToScore; i += bulkSize) {
int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
// Copy the slice of random ordinals to the scratch array
System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
queryScorer.bulkScore(toScore, scores, toScoreInThisBatch);
}
return scores;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public class VectorScorerInt7uBulkBenchmark {
@Param({ "16", "32", "64", "256", "1024" })
public int bulkSize;

@Param({ "SCALAR", "LUCENE", "NATIVE" })
@Param
public VectorImplementation implementation;

@Param({ "DOT_PRODUCT", "EUCLIDEAN" })
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public void testSequential() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down Expand Up @@ -78,6 +79,7 @@ public void testRandom() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down Expand Up @@ -109,6 +111,7 @@ public void testQueryRandom() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public void testSequential() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down Expand Up @@ -79,6 +80,7 @@ public void testRandom() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down Expand Up @@ -110,6 +112,7 @@ public void testQueryRandom() throws Exception {
bench.dims = dims;
bench.numVectors = 1000;
bench.numVectorsToScore = 200;
bench.bulkSize = 200;
bench.setup(vectorData);

try {
Expand Down