Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
541124d
Add int4 vector scoring benchmarks
ldematte Mar 12, 2026
73bc9af
Add native int4 vector scoring implementation
ldematte Mar 12, 2026
63d0870
Extract shared int4 correction formulas
ldematte Mar 12, 2026
731064d
Add int4 vector scorer factory tests
ldematte Mar 12, 2026
ec7a834
Move int4 correction formulas into ScalarOperations
ldematte Mar 12, 2026
c94f733
Cleanup
ldematte Mar 12, 2026
aefb340
More cleanup
ldematte Mar 12, 2026
31b3531
More renaming
ldematte Mar 12, 2026
8d57563
Removing duplicated classes + single correction function
ldematte Mar 13, 2026
b06d112
Merge remote-tracking branch 'upstream/main' into native/vec-i4
ldematte Mar 13, 2026
56271a3
Merge branch 'native/vec-i4' into native/vec-i4-impl
ldematte Mar 13, 2026
6ae48de
Small fix -- explicit cast to long to force <R> to be Long
ldematte Mar 13, 2026
4723a36
Refactoring: extract common ScorerImpl, fix heap-segment, improve seg…
ldematte Mar 13, 2026
e364e10
Add tests for bulk scoring, NIO filesystem. Move common function to A…
ldematte Mar 13, 2026
383b5d0
Add benchmarks for native; refactor benchmark code to share utilities…
ldematte Mar 13, 2026
6c94b3f
Revert native Int4 scorer integration in codec
ldematte Mar 13, 2026
b6eb707
Refactor: expose Int4 test utilities via a test fixture in libs/native
ldematte Mar 13, 2026
d0da896
Introduce lower level i4 distance functions tests
ldematte Mar 13, 2026
51691bd
Small test fix
ldematte Mar 13, 2026
e54ed73
Merge remote-tracking branch 'upstream/main' into native/vec-i4-impl
ldematte Mar 16, 2026
acf0522
Fix missing header
ldematte Mar 16, 2026
02d458d
Publish vec binaries + update version
ldematte Mar 16, 2026
176bcb7
Merge branch 'main' into native/vec-i4-impl
ldematte Mar 16, 2026
f2f7727
Merge branch 'main' into native/vec-i4-impl
ldematte Mar 16, 2026
ed81926
Merge branch 'main' into native/vec-i4-impl
ldematte Mar 17, 2026
a7b8ca7
PR comments
ldematte Mar 17, 2026
b4d6519
Merge remote-tracking branch 'upstream/main' into native/vec-i4-impl
ldematte Mar 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dependencies {
api(project(':x-pack:plugin:analytics'))
api(project(':x-pack:plugin:logsdb'))
implementation project(path: ':libs:native')
implementation(testFixtures(project(':libs:native')))
implementation project(path: ':libs:simdvec')
implementation (testFixtures(project(path: ':libs:simdvec')))
implementation project(path: ':libs:swisshash')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
Expand All @@ -23,87 +25,20 @@
import java.util.concurrent.ThreadLocalRandom;

import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.applyI4Corrections;
import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.dotProductI4SinglePacked;
import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.unpackNibbles;
import static org.elasticsearch.nativeaccess.Int4TestUtils.dotProductI4SinglePacked;
import static org.elasticsearch.nativeaccess.Int4TestUtils.unpackNibbles;
import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.writePackedVectorWithCorrection;

public class Int4BenchmarkUtils {

/**
* In-memory implementation of {@link QuantizedByteVectorValues} for int4 (PACKED_NIBBLE) benchmarks.
* Stores pre-quantized packed nibble vectors with synthetic corrective terms.
*/
static class InMemoryInt4QuantizedByteVectorValues extends QuantizedByteVectorValues {

private final int dims;
private final byte[][] packedVectors;
private final OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms;
private final float[] centroid;
private final float centroidDP;
private final OptimizedScalarQuantizer quantizer;

InMemoryInt4QuantizedByteVectorValues(
int dims,
byte[][] packedVectors,
OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms,
float[] centroid,
float centroidDP
) {
this.dims = dims;
this.packedVectors = packedVectors;
this.correctiveTerms = correctiveTerms;
this.centroid = centroid;
this.centroidDP = centroidDP;
this.quantizer = new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT);
}

@Override
public int dimension() {
return dims;
}

@Override
public int size() {
return packedVectors.length;
}

@Override
public byte[] vectorValue(int ord) throws IOException {
return packedVectors[ord];
}

@Override
public OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int vectorOrd) throws IOException {
return correctiveTerms[vectorOrd];
}

@Override
public OptimizedScalarQuantizer getQuantizer() {
return quantizer;
}

@Override
public Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() {
return Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE;
}

@Override
public float[] getCentroid() throws IOException {
return centroid;
}

@Override
public float getCentroidDP() throws IOException {
return centroidDP;
}
static final String VECTOR_DATA_FILE = "int4-vector.data";

@Override
public VectorScorer scorer(float[] query) throws IOException {
return null;
}

@Override
public InMemoryInt4QuantizedByteVectorValues copy() throws IOException {
return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
static void writeI4VectorData(Directory dir, byte[][] packedVectors, OptimizedScalarQuantizer.QuantizationResult[] corrections)
throws IOException {
try (IndexOutput out = dir.createOutput(VECTOR_DATA_FILE, IOContext.DEFAULT)) {
for (int i = 0; i < packedVectors.length; i++) {
writePackedVectorWithCorrection(out, packedVectors[i], corrections[i]);
}
}
}

Expand Down Expand Up @@ -142,25 +77,6 @@ public void setScoringOrdinal(int node) throws IOException {
}
}

static QuantizedByteVectorValues createI4QuantizedVectorValues(int dims, byte[][] packedVectors) {
var random = ThreadLocalRandom.current();
var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[packedVectors.length];
for (int i = 0; i < packedVectors.length; i++) {
correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult(
random.nextFloat(-1f, 1f),
random.nextFloat(-1f, 1f),
random.nextFloat(-1f, 1f),
random.nextInt(0, dims * 15)
);
}
float[] centroid = new float[dims];
for (int i = 0; i < dims; i++) {
centroid[i] = random.nextFloat();
}
float centroidDP = random.nextFloat();
return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
}

static UpdateableRandomVectorScorer createI4ScalarScorer(
QuantizedByteVectorValues values,
VectorSimilarityFunction similarityFunction
Expand All @@ -179,7 +95,7 @@ static RandomVectorScorer createI4ScalarQueryScorer(
Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding = values.getScalarEncoding();

byte[] queryQuantized = new byte[encoding.getDiscreteDimensions(dims)];
float[] queryCopy = Arrays.copyOf(queryVector, queryVector.length);
float[] queryCopy = queryVector.clone();
if (similarityFunction == VectorSimilarityFunction.COSINE) {
VectorUtil.l2normalize(queryCopy);
}
Expand All @@ -196,4 +112,50 @@ public float score(int node) throws IOException {
}
};
}

static OptimizedScalarQuantizer.QuantizationResult[] generateCorrectiveTerms(int dims, int numVectors) {
var random = ThreadLocalRandom.current();
var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[numVectors];
for (int i = 0; i < numVectors; i++) {
correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult(
random.nextFloat(-1f, 1f),
random.nextFloat(-1f, 1f),
random.nextFloat(-1f, 1f),
random.nextInt(0, dims * 15)
);
}
return correctiveTerms;
}

static float[] generateCentroid(int dims) {
var random = ThreadLocalRandom.current();
float[] centroid = new float[dims];
for (int i = 0; i < dims; i++) {
centroid[i] = random.nextFloat();
}
return centroid;
}

/**
* Quantizes a float query vector for use with the native Int4 scorer.
* Returns the unpacked quantized bytes (one byte per dimension, 0-15 range).
*/
static QuantizedQuery quantizeQuery(QuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVector)
throws IOException {
int dims = values.dimension();
OptimizedScalarQuantizer quantizer = values.getQuantizer();
float[] centroid = values.getCentroid();
var encoding = values.getScalarEncoding();

byte[] scratch = new byte[encoding.getDiscreteDimensions(dims)];
float[] queryCopy = queryVector.clone();
if (sim == VectorSimilarityFunction.COSINE) {
VectorUtil.l2normalize(queryCopy);
}
var corrections = quantizer.scalarQuantize(queryCopy, scratch, encoding.getQueryBits(), centroid);
byte[] unpackedQuery = Arrays.copyOf(scratch, dims);
return new QuantizedQuery(unpackedQuery, corrections);
}

record QuantizedQuery(byte[] unpackedQuery, OptimizedScalarQuantizer.QuantizationResult corrections) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,6 @@ static int squareDistance(byte[] a, byte[] b) {
return res;
}

static int dotProductI4SinglePacked(byte[] unpacked, byte[] packed) {
int total = 0;
for (int i = 0; i < packed.length; i++) {
byte packedByte = packed[i];
byte unpacked1 = unpacked[i];
byte unpacked2 = unpacked[i + packed.length];
total += (packedByte & 0x0F) * unpacked2;
total += ((packedByte & 0xFF) >> 4) * unpacked1;
}
return total;
}

public static float applyI4Corrections(
int rawDot,
int dims,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@

import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
import org.elasticsearch.benchmark.Utils;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.simdvec.VectorSimilarityType;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
Expand All @@ -29,21 +35,30 @@
import org.openjdk.jmh.annotations.Warmup;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.getScorerFactoryOrDie;
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier;
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer;
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.VECTOR_DATA_FILE;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer;
import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.generateCentroid;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.generateCorrectiveTerms;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.quantizeQuery;
import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.writeI4VectorData;
import static org.elasticsearch.nativeaccess.Int4TestUtils.packNibbles;
import static org.elasticsearch.simdvec.ESVectorUtil.dotProduct;
import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.createDenseInt4VectorValues;
import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes;

/**
* Benchmark that compares int4 packed-nibble quantized vector similarity scoring:
* scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer.
* scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer vs native.
* Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4Benchmark'
*/
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
Expand All @@ -62,28 +77,37 @@ public class VectorScorerInt4Benchmark {
public int dims;
public static int numVectors = 2;

@Param({ "SCALAR", "LUCENE" })
@Param
public VectorImplementation implementation;

@Param({ "DOT_PRODUCT", "EUCLIDEAN" })
public VectorSimilarityType function;

private Path path;
private Directory dir;
private IndexInput in;

private UpdateableRandomVectorScorer scorer;
private RandomVectorScorer queryScorer;

static class VectorData {
final QuantizedByteVectorValues values;
final byte[][] packedVectors;
final OptimizedScalarQuantizer.QuantizationResult[] corrections;
final float[] centroid;
final float centroidDp;
final float[] queryVector;

VectorData(int dims) {
byte[][] packedVectors = new byte[numVectors][];
packedVectors = new byte[numVectors][];
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int v = 0; v < numVectors; v++) {
byte[] unpacked = new byte[dims];
randomInt4Bytes(random, unpacked);
packedVectors[v] = packNibbles(unpacked);
}
values = createI4QuantizedVectorValues(dims, packedVectors);
corrections = generateCorrectiveTerms(dims, numVectors);
centroid = generateCentroid(dims);
centroidDp = dotProduct(centroid, centroid);
queryVector = new float[dims];
for (int i = 0; i < dims; i++) {
queryVector[i] = random.nextFloat();
Expand All @@ -98,7 +122,20 @@ public void setup() throws IOException {

void setup(VectorData vectorData) throws IOException {
VectorSimilarityFunction similarityFunction = function.function();
var values = vectorData.values;

path = Files.createTempDirectory("Int4ScorerBenchmark");
dir = new MMapDirectory(path);
writeI4VectorData(dir, vectorData.packedVectors, vectorData.corrections);
in = dir.openInput(VECTOR_DATA_FILE, IOContext.DEFAULT);

QuantizedByteVectorValues values = createDenseInt4VectorValues(
dims,
numVectors,
vectorData.centroid,
vectorData.centroidDp,
in,
similarityFunction
);

switch (implementation) {
case SCALAR:
Expand All @@ -111,13 +148,32 @@ void setup(VectorData vectorData) throws IOException {
queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector);
}
break;
case NATIVE:
var factory = getScorerFactoryOrDie();
scorer = factory.getInt4VectorScorerSupplier(function, in, values).orElseThrow().scorer();
if (supportsHeapSegments()) {
var qQuery = quantizeQuery(values, similarityFunction, vectorData.queryVector);
queryScorer = factory.getInt4VectorScorer(
similarityFunction,
values,
qQuery.unpackedQuery(),
qQuery.corrections().lowerInterval(),
qQuery.corrections().upperInterval(),
qQuery.corrections().additionalCorrection(),
qQuery.corrections().quantizedComponentSum()
).orElseThrow();
}
break;
}

scorer.setScoringOrdinal(0);
}

@TearDown
public void teardown() throws IOException {}
public void teardown() throws IOException {
IOUtils.close(in, dir);
IOUtils.rm(path);
}

@Benchmark
public float score() throws IOException {
Expand Down
Loading
Loading