diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java index 91cd12e2d87b8..ed0ddce3268a4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java @@ -11,6 +11,7 @@ import org.apache.lucene.backward_codecs.lucene99.OffHeapQuantizedByteVectorValues; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues; import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues; import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer; @@ -134,6 +135,21 @@ static RandomVectorScorer luceneScorer(QuantizedByteVectorValues values, VectorS return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec); } + static RandomVectorScorerSupplier lucene104ScoreSupplier( + org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues values, + VectorSimilarityFunction sim + ) throws IOException { + return new Lucene104ScalarQuantizedVectorScorer(null).getRandomVectorScorerSupplier(sim, values); + } + + static RandomVectorScorer lucene104Scorer( + org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues values, + VectorSimilarityFunction sim, + float[] queryVec + ) throws IOException { + return new Lucene104ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec); + } + static RuntimeException rethrow(Throwable t) { if (t instanceof Error err) { throw err; diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java new file mode 100644 index 0000000000000..deec4d9ea4201 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java @@ -0,0 +1,199 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.VectorScorer; +import org.apache.lucene.util.VectorUtil; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; + +import java.io.IOException; +import java.util.Arrays; +import java.util.concurrent.ThreadLocalRandom; + +import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.applyI4Corrections; +import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.dotProductI4SinglePacked; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.unpackNibbles; + +public class Int4BenchmarkUtils { + + /** + * In-memory implementation of {@link QuantizedByteVectorValues} for int4 (PACKED_NIBBLE) benchmarks. + * Stores pre-quantized packed nibble vectors with synthetic corrective terms. + */ + static class InMemoryInt4QuantizedByteVectorValues extends QuantizedByteVectorValues { + + private final int dims; + private final byte[][] packedVectors; + private final OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms; + private final float[] centroid; + private final float centroidDP; + private final OptimizedScalarQuantizer quantizer; + + InMemoryInt4QuantizedByteVectorValues( + int dims, + byte[][] packedVectors, + OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms, + float[] centroid, + float centroidDP + ) { + this.dims = dims; + this.packedVectors = packedVectors; + this.correctiveTerms = correctiveTerms; + this.centroid = centroid; + this.centroidDP = centroidDP; + this.quantizer = new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT); + } + + @Override + public int dimension() { + return dims; + } + + @Override + public int size() { + return packedVectors.length; + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + return packedVectors[ord]; + } + + @Override + public OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int vectorOrd) throws IOException { + return correctiveTerms[vectorOrd]; + } + + @Override + public OptimizedScalarQuantizer getQuantizer() { + return quantizer; + } + + @Override + public Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() { + return Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE; + } + + @Override + public float[] getCentroid() throws IOException { + return centroid; + } + + @Override + public float getCentroidDP() throws IOException { + return centroidDP; + } + + @Override + public VectorScorer scorer(float[] query) throws IOException { + return null; + } + + @Override + public InMemoryInt4QuantizedByteVectorValues copy() throws IOException { + return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP); + } + } + + private static class ScalarScorer implements UpdateableRandomVectorScorer { + private final QuantizedByteVectorValues values; + private final int dims; + private final VectorSimilarityFunction similarityFunction; + + private byte[] queryUnpacked; + private OptimizedScalarQuantizer.QuantizationResult queryCorrections; + + ScalarScorer(QuantizedByteVectorValues values, VectorSimilarityFunction similarityFunction) { + this.values = values; + this.dims = values.dimension(); + this.similarityFunction = similarityFunction; + } + + @Override + public float score(int node) throws IOException { + byte[] packed = values.vectorValue(node); + int rawDot = dotProductI4SinglePacked(queryUnpacked, packed); + var nodeCorrections = values.getCorrectiveTerms(node); + return applyI4Corrections(rawDot, dims, nodeCorrections, queryCorrections, values.getCentroidDP(), similarityFunction); + } + + @Override + public int maxOrd() { + return values.size(); + } + + @Override + public void setScoringOrdinal(int node) throws IOException { + byte[] packed = values.vectorValue(node); + queryUnpacked = unpackNibbles(packed, dims); + queryCorrections = values.getCorrectiveTerms(node); + } + } + + static QuantizedByteVectorValues createI4QuantizedVectorValues(int dims, byte[][] packedVectors) { + var random = ThreadLocalRandom.current(); + var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[packedVectors.length]; + for (int i = 0; i < packedVectors.length; i++) { + correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult( + random.nextFloat(-1f, 1f), + random.nextFloat(-1f, 1f), + random.nextFloat(-1f, 1f), + random.nextInt(0, dims * 15) + ); + } + float[] centroid = new float[dims]; + for (int i = 0; i < dims; i++) { + centroid[i] = random.nextFloat(); + } + float centroidDP = random.nextFloat(); + return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP); + } + + static UpdateableRandomVectorScorer createI4ScalarScorer( + QuantizedByteVectorValues values, + VectorSimilarityFunction similarityFunction + ) { + return new ScalarScorer(values, similarityFunction); + } + + static RandomVectorScorer createI4ScalarQueryScorer( + QuantizedByteVectorValues values, + VectorSimilarityFunction similarityFunction, + float[] queryVector + ) throws IOException { + int dims = values.dimension(); + OptimizedScalarQuantizer quantizer = values.getQuantizer(); + float[] centroid = values.getCentroid(); + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding = values.getScalarEncoding(); + + byte[] queryQuantized = new byte[encoding.getDiscreteDimensions(dims)]; + float[] queryCopy = Arrays.copyOf(queryVector, queryVector.length); + if (similarityFunction == VectorSimilarityFunction.COSINE) { + VectorUtil.l2normalize(queryCopy); + } + var queryCorrections = quantizer.scalarQuantize(queryCopy, queryQuantized, encoding.getQueryBits(), centroid); + float centroidDP = values.getCentroidDP(); + + return new RandomVectorScorer.AbstractRandomVectorScorer(values) { + @Override + public float score(int node) throws IOException { + byte[] packed = values.vectorValue(node); + int rawDot = dotProductI4SinglePacked(queryQuantized, packed); + var nodeCorrections = values.getCorrectiveTerms(node); + return applyI4Corrections(rawDot, dims, nodeCorrections, queryCorrections, centroidDP, similarityFunction); + } + }; + } +} diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java index fe5cfdce8a399..5ef0ba5a9982a 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java @@ -9,6 +9,13 @@ package org.elasticsearch.benchmark.vector.scorer; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.util.VectorUtil; +import org.apache.lucene.util.quantization.OptimizedScalarQuantizer; + +import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; +import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; + /** * Basic scalar implementations of similarity operations. *

@@ -17,6 +24,8 @@ */ class ScalarOperations { + private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1); + static float cosine(byte[] a, byte[] b) { int sum = 0; int norm1 = 0; @@ -65,4 +74,54 @@ static int squareDistance(byte[] a, byte[] b) { } return res; } + + static int dotProductI4SinglePacked(byte[] unpacked, byte[] packed) { + int total = 0; + for (int i = 0; i < packed.length; i++) { + byte packedByte = packed[i]; + byte unpacked1 = unpacked[i]; + byte unpacked2 = unpacked[i + packed.length]; + total += (packedByte & 0x0F) * unpacked2; + total += ((packedByte & 0xFF) >> 4) * unpacked1; + } + return total; + } + + public static float applyI4Corrections( + int rawDot, + int dims, + OptimizedScalarQuantizer.QuantizationResult nodeCorrections, + OptimizedScalarQuantizer.QuantizationResult queryCorrections, + float centroidDP, + VectorSimilarityFunction similarityFunction + ) { + float ax = nodeCorrections.lowerInterval(); + float lx = (nodeCorrections.upperInterval() - ax) * FOUR_BIT_SCALE; + float ay = queryCorrections.lowerInterval(); + float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE; + float x1 = nodeCorrections.quantizedComponentSum(); + float y1 = queryCorrections.quantizedComponentSum(); + + float score = ax * ay * dims + ay * lx * x1 + ax * ly * y1 + lx * ly * rawDot; + + // For euclidean, we need to invert the score and apply the additional correction, which is + // assumed to be the squared l2norm of the centroid centered vectors. + if (similarityFunction == EUCLIDEAN) { + score = queryCorrections.additionalCorrection() + nodeCorrections.additionalCorrection() - 2 * score; + // Ensure that 'score' (the squared euclidean distance) is non-negative. The computed value + // may be negative as a result of quantization loss. + return VectorUtil.normalizeDistanceToUnitInterval(Math.max(score, 0f)); + } else { + // For cosine and max inner product, we need to apply the additional correction, which is + // assumed to be the non-centered dot-product between the vector and the centroid + score += queryCorrections.additionalCorrection() + nodeCorrections.additionalCorrection() - centroidDP; + if (similarityFunction == MAXIMUM_INNER_PRODUCT) { + return VectorUtil.scaleMaxInnerProductScore(score); + } + // Ensure that 'score' (a normalized dot product) is in [-1,1]. The computed value may be out + // of bounds as a result of quantization loss. + score = Math.clamp(score, -1, 1); + return VectorUtil.normalizeToUnitInterval(score); + } + } } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java new file mode 100644 index 0000000000000..f943ec9fb08ad --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.elasticsearch.benchmark.Utils; +import org.elasticsearch.simdvec.VectorSimilarityType; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + +import java.io.IOException; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier; +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer; +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes; + +/** + * Benchmark that compares int4 packed-nibble quantized vector similarity scoring: + * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer. + * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4Benchmark' + */ +@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) +@Warmup(iterations = 3, time = 3) +@Measurement(iterations = 5, time = 3) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Thread) +public class VectorScorerInt4Benchmark { + + static { + Utils.configureBenchmarkLogging(); + } + + @Param({ "96", "768", "1024" }) + public int dims; + public static int numVectors = 2; + + @Param({ "SCALAR", "LUCENE" }) + public VectorImplementation implementation; + + @Param({ "DOT_PRODUCT", "EUCLIDEAN" }) + public VectorSimilarityType function; + + private UpdateableRandomVectorScorer scorer; + private RandomVectorScorer queryScorer; + + static class VectorData { + final QuantizedByteVectorValues values; + final float[] queryVector; + + VectorData(int dims) { + byte[][] packedVectors = new byte[numVectors][]; + ThreadLocalRandom random = ThreadLocalRandom.current(); + for (int v = 0; v < numVectors; v++) { + byte[] unpacked = new byte[dims]; + randomInt4Bytes(random, unpacked); + packedVectors[v] = packNibbles(unpacked); + } + values = createI4QuantizedVectorValues(dims, packedVectors); + queryVector = new float[dims]; + for (int i = 0; i < dims; i++) { + queryVector[i] = random.nextFloat(); + } + } + } + + @Setup + public void setup() throws IOException { + setup(new VectorData(dims)); + } + + void setup(VectorData vectorData) throws IOException { + VectorSimilarityFunction similarityFunction = function.function(); + var values = vectorData.values; + + switch (implementation) { + case SCALAR: + scorer = createI4ScalarScorer(values, similarityFunction); + queryScorer = createI4ScalarQueryScorer(values, similarityFunction, vectorData.queryVector); + break; + case LUCENE: + scorer = lucene104ScoreSupplier(values, similarityFunction).scorer(); + if (supportsHeapSegments()) { + queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector); + } + break; + } + + scorer.setScoringOrdinal(0); + } + + @TearDown + public void teardown() throws IOException {} + + @Benchmark + public float score() throws IOException { + return scorer.score(1); + } + + @Benchmark + public float scoreQuery() throws IOException { + return queryScorer.score(1); + } +} diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java new file mode 100644 index 0000000000000..e5331fa3cbdb1 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java @@ -0,0 +1,216 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer; +import org.elasticsearch.benchmark.Utils; +import org.elasticsearch.simdvec.VectorSimilarityType; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier; +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer; +import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer; +import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes; + +/** + * Benchmark that compares bulk scoring of int4 packed-nibble quantized vectors: + * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer, across sequential + * and random access patterns. + * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4BulkBenchmark' + */ +@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) +@Warmup(iterations = 3, time = 3) +@Measurement(iterations = 5, time = 3) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@State(Scope.Thread) +public class VectorScorerInt4BulkBenchmark { + + static { + Utils.configureBenchmarkLogging(); + } + + @Param({ "1024" }) + public int dims; + + @Param({ "128", "1500", "130000" }) + public int numVectors; + public int numVectorsToScore; + + @Param({ "16", "32", "64", "256", "1024" }) + public int bulkSize; + + @Param({ "SCALAR", "LUCENE" }) + public VectorImplementation implementation; + + @Param({ "DOT_PRODUCT", "EUCLIDEAN" }) + public VectorSimilarityType function; + + private float[] scores; + private int[] ordinals; + private int[] ids; + private int[] toScore; + + private UpdateableRandomVectorScorer scorer; + private RandomVectorScorer queryScorer; + + static class VectorData { + final int numVectorsToScore; + final QuantizedByteVectorValues values; + final int[] ordinals; + final int targetOrd; + final float[] queryVector; + + VectorData(int dims, int numVectors, int numVectorsToScore) { + this.numVectorsToScore = numVectorsToScore; + byte[][] packedVectors = new byte[numVectors][]; + ThreadLocalRandom random = ThreadLocalRandom.current(); + for (int v = 0; v < numVectors; v++) { + byte[] unpacked = new byte[dims]; + randomInt4Bytes(random, unpacked); + packedVectors[v] = packNibbles(unpacked); + } + values = createI4QuantizedVectorValues(dims, packedVectors); + + List list = IntStream.range(0, numVectors).boxed().collect(Collectors.toList()); + Collections.shuffle(list, random); + ordinals = list.stream().limit(numVectorsToScore).mapToInt(Integer::intValue).toArray(); + targetOrd = random.nextInt(numVectors); + + queryVector = new float[dims]; + for (int i = 0; i < dims; i++) { + queryVector[i] = random.nextFloat(); + } + } + } + + @Setup + public void setup() throws IOException { + setup(new VectorData(dims, numVectors, Math.min(numVectors, 20_000))); + } + + void setup(VectorData vectorData) throws IOException { + VectorSimilarityFunction similarityFunction = function.function(); + var values = vectorData.values; + + numVectorsToScore = vectorData.numVectorsToScore; + scores = new float[bulkSize]; + toScore = new int[bulkSize]; + ids = IntStream.range(0, numVectors).toArray(); + ordinals = vectorData.ordinals; + + switch (implementation) { + case SCALAR: + scorer = createI4ScalarScorer(values, similarityFunction); + queryScorer = createI4ScalarQueryScorer(values, similarityFunction, vectorData.queryVector); + break; + case LUCENE: + scorer = lucene104ScoreSupplier(values, similarityFunction).scorer(); + if (supportsHeapSegments()) { + queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector); + } + break; + } + + scorer.setScoringOrdinal(vectorData.targetOrd); + } + + @TearDown + public void teardown() throws IOException {} + + @Benchmark + public float[] scoreMultipleSequential() throws IOException { + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(v); + } + } + return scores; + } + + @Benchmark + public float[] scoreMultipleRandom() throws IOException { + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = scorer.score(ordinals[v]); + } + } + return scores; + } + + @Benchmark + public float[] scoreQueryMultipleRandom() throws IOException { + int v = 0; + while (v < numVectorsToScore) { + for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) { + scores[i] = queryScorer.score(ordinals[v]); + } + } + return scores; + } + + @Benchmark + public float[] scoreMultipleSequentialBulk() throws IOException { + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } + return scores; + } + + @Benchmark + public float[] scoreMultipleRandomBulk() throws IOException { + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + scorer.bulkScore(toScore, scores, toScoreInThisBatch); + } + return scores; + } + + @Benchmark + public float[] scoreQueryMultipleRandomBulk() throws IOException { + for (int i = 0; i < numVectorsToScore; i += bulkSize) { + int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i); + System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch); + queryScorer.bulkScore(toScore, scores, toScoreInThisBatch); + } + return scores; + } +} diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java new file mode 100644 index 0000000000000..a6a222ba1f044 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java @@ -0,0 +1,71 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.benchmark.vector.scorer; + +import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.benchmark.Utils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles; +import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes; + +/** + * Benchmark comparing raw int4 packed-nibble dot product implementations: + * scalar (plain loop) vs Lucene (Panama-vectorized VectorUtil). + * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4OperationBenchmark' + */ +@Fork(value = 3, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 5, time = 1) +public class VectorScorerInt4OperationBenchmark { + + static { + Utils.configureBenchmarkLogging(); + } + + public byte[] unpacked; + public byte[] packed; + + @Param({ "2", "128", "208", "256", "300", "512", "702", "1024", "1536", "2048" }) + public int size; + + @Setup(Level.Iteration) + public void init() { + unpacked = new byte[size]; + randomInt4Bytes(ThreadLocalRandom.current(), unpacked); + packed = packNibbles(unpacked); + } + + @Benchmark + public int scalar() { + return ScalarOperations.dotProductI4SinglePacked(unpacked, packed); + } + + @Benchmark + public int lucene() { + return VectorUtil.int4DotProductSinglePacked(unpacked, packed); + } +} diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java new file mode 100644 index 0000000000000..dd632058bb962 --- /dev/null +++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.simdvec.VectorSimilarityType; +import org.elasticsearch.test.ESTestCase; +import org.openjdk.jmh.annotations.Param; + +import java.util.Arrays; + +public class VectorScorerInt4BenchmarkTests extends ESTestCase { + + private final double delta = 1e-3; + private final VectorSimilarityType function; + private final int dims; + + public VectorScorerInt4BenchmarkTests(VectorSimilarityType function, int dims) { + this.function = function; + this.dims = dims; + } + + public void testScores() throws Exception { + for (int i = 0; i < 100; i++) { + var data = new VectorScorerInt4Benchmark.VectorData(dims); + + var scalar = new VectorScorerInt4Benchmark(); + scalar.function = function; + scalar.implementation = VectorImplementation.SCALAR; + scalar.dims = dims; + scalar.setup(data); + + var lucene = new VectorScorerInt4Benchmark(); + lucene.function = function; + lucene.implementation = VectorImplementation.LUCENE; + lucene.dims = dims; + lucene.setup(data); + + try { + float expected = scalar.score(); + assertEquals("LUCENE score", expected, lucene.score(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testQueryScores() throws Exception { + for (int i = 0; i < 100; i++) { + var data = new VectorScorerInt4Benchmark.VectorData(dims); + + var scalar = new VectorScorerInt4Benchmark(); + scalar.function = function; + scalar.implementation = VectorImplementation.SCALAR; + scalar.dims = dims; + scalar.setup(data); + + var lucene = new VectorScorerInt4Benchmark(); + lucene.function = function; + lucene.implementation = VectorImplementation.LUCENE; + lucene.dims = dims; + lucene.setup(data); + + try { + float expected = scalar.scoreQuery(); + assertEquals("LUCENE scoreQuery", expected, lucene.scoreQuery(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + @ParametersFactory + public static Iterable parametersFactory() { + try { + String[] dims = VectorScorerInt4Benchmark.class.getField("dims").getAnnotationsByType(Param.class)[0].value(); + String[] functions = VectorScorerInt4Benchmark.class.getField("function").getAnnotationsByType(Param.class)[0].value(); + return () -> Arrays.stream(dims) + .map(Integer::parseInt) + .flatMap(d -> Arrays.stream(functions).map(f -> new Object[] { VectorSimilarityType.valueOf(f), d })) + .iterator(); + } catch (NoSuchFieldException e) { + throw new AssertionError(e); + } + } +} diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java new file mode 100644 index 0000000000000..8af55d30e6a97 --- /dev/null +++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.simdvec.VectorSimilarityType; +import org.elasticsearch.test.ESTestCase; +import org.openjdk.jmh.annotations.Param; + +import java.util.Arrays; + +public class VectorScorerInt4BulkBenchmarkTests extends ESTestCase { + + private final VectorSimilarityType function; + private final float delta = 1e-3f; + private final int dims; + + public VectorScorerInt4BulkBenchmarkTests(VectorSimilarityType function, int dims) { + this.function = function; + this.dims = dims; + } + + private VectorScorerInt4BulkBenchmark createBench(VectorImplementation impl, VectorScorerInt4BulkBenchmark.VectorData vectorData) + throws Exception { + var bench = new VectorScorerInt4BulkBenchmark(); + bench.function = function; + bench.implementation = impl; + bench.dims = dims; + bench.numVectors = 1000; + bench.numVectorsToScore = 200; + bench.bulkSize = 200; + bench.setup(vectorData); + return bench; + } + + public void testSequential() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreMultipleSequential(); + assertArrayEquals("LUCENE sequential", expected, lucene.scoreMultipleSequential(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testRandom() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreMultipleRandom(); + assertArrayEquals("LUCENE random", expected, lucene.scoreMultipleRandom(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testQueryRandom() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreQueryMultipleRandom(); + assertArrayEquals("LUCENE queryRandom", expected, lucene.scoreQueryMultipleRandom(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testSequentialBulk() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreMultipleSequentialBulk(); + assertArrayEquals("LUCENE sequentialBulk", expected, lucene.scoreMultipleSequentialBulk(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testRandomBulk() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreMultipleRandomBulk(); + assertArrayEquals("LUCENE randomBulk", expected, lucene.scoreMultipleRandomBulk(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + public void testQueryRandomBulk() throws Exception { + for (int i = 0; i < 100; i++) { + var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200); + var scalar = createBench(VectorImplementation.SCALAR, vectorData); + var lucene = createBench(VectorImplementation.LUCENE, vectorData); + + try { + float[] expected = scalar.scoreQueryMultipleRandomBulk(); + assertArrayEquals("LUCENE queryRandomBulk", expected, lucene.scoreQueryMultipleRandomBulk(), delta); + } finally { + scalar.teardown(); + lucene.teardown(); + } + } + } + + @ParametersFactory + public static Iterable parametersFactory() { + try { + String[] dims = VectorScorerInt4BulkBenchmark.class.getField("dims").getAnnotationsByType(Param.class)[0].value(); + String[] functions = VectorScorerInt4BulkBenchmark.class.getField("function").getAnnotationsByType(Param.class)[0].value(); + return () -> Arrays.stream(dims) + .map(Integer::parseInt) + .flatMap(d -> Arrays.stream(functions).map(f -> new Object[] { VectorSimilarityType.valueOf(f), d })) + .iterator(); + } catch (NoSuchFieldException e) { + throw new AssertionError(e); + } + } +} diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java new file mode 100644 index 0000000000000..d2aa18f3f27cb --- /dev/null +++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.vector.scorer; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.ESTestCase; +import org.openjdk.jmh.annotations.Param; + +import java.util.Arrays; + +public class VectorScorerInt4OperationBenchmarkTests extends ESTestCase { + + private final int size; + + public VectorScorerInt4OperationBenchmarkTests(int size) { + this.size = size; + } + + public void test() { + for (int i = 0; i < 100; i++) { + var bench = new VectorScorerInt4OperationBenchmark(); + bench.size = size; + bench.init(); + + int expected = bench.scalar(); + assertEquals(expected, bench.lucene()); + } + } + + @ParametersFactory + public static Iterable parametersFactory() { + try { + String[] sizes = VectorScorerInt4OperationBenchmark.class.getField("size").getAnnotationsByType(Param.class)[0].value(); + return () -> Arrays.stream(sizes).map(Integer::parseInt).map(s -> new Object[] { s }).iterator(); + } catch (NoSuchFieldException e) { + throw new AssertionError(e); + } + } +} diff --git a/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java b/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java index 196bd31045881..5b6b52a102c0a 100644 --- a/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java +++ b/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java @@ -210,4 +210,56 @@ public static void randomVector(Random random, float[] vector, VectorSimilarityF VectorUtil.l2normalize(vector); } } + + public static void randomInt4Bytes(Random random, byte[] bytes) { + for (int i = 0, len = bytes.length; i < len;) { + bytes[i++] = (byte) random.nextInt(0, 0x10); + } + } + + /** + * Packs unpacked int4 values (one value per byte) into Lucene nibble-packed format (two values per byte) + * written by {@code Lucene104ScalarQuantizedVectorsWriter} (ScalarEncoding#PACKED_NIBBLE format). + *

+ * The unpacked input comes from {@link OptimizedScalarQuantizer#scalarQuantize}, which quantizes a float + * vector into one byte per element in natural order: unpacked = [v0, v1, v2, ..., v_{N-1}] where N = dims. + *

+ * The packed format pairs elements that are packedLength ({@param unpacked} length / 2) apart. For example, + * with dims=8, unpacked.length is 8 and packedLength is 4: + * - {@code packed[0] = (v0 << 4) | v4} + * - {@code packed[1] = (v1 << 4) | v5} + * - {@code packed[2] = (v2 << 4) | v6} + * - {@code packed[3] = (v3 << 4) | v7} + *

+ * Or, visually, + * UNPACKED (8 bytes, natural vector order, one 4-bit value per byte): + * index: 0 1 2 3 4 5 6 7 + * [v0] [v1] [v2] [v3] [v4] [v5] [v6] [v7] + * PACKED (4 bytes, on disk, two 4-bit values per byte): + * index: 0 1 2 3 + * [v0 | v4] [v1 | v5] [v2 | v6] [v3 | v7] + * hi lo hi lo hi lo hi lo + * 7..4 3..0 7..4 3..0 7..4 3..0 7..4 3..0 + */ + public static byte[] packNibbles(byte[] unpacked) { + int packedLength = unpacked.length / 2; + byte[] packed = new byte[packedLength]; + for (int i = 0; i < packedLength; i++) { + packed[i] = (byte) ((unpacked[i] << 4) | (unpacked[i + packedLength] & 0x0F)); + } + return packed; + } + + /** + * Unpacks "nibble-packed" int4 values (two values per byte) into a byte[] (one value per byte) + */ + public static byte[] unpackNibbles(byte[] packed, int dims) { + byte[] unpacked = new byte[dims]; + int packedLen = packed.length; + for (int i = 0; i < packedLen; i++) { + unpacked[i] = (byte) ((packed[i] & 0xFF) >> 4); + unpacked[i + packedLen] = (byte) (packed[i] & 0x0F); + } + return unpacked; + } }