elastic · ldematte · Mar 17, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle
@@ -51,6 +51,7 @@ dependencies {
   api(project(':x-pack:plugin:analytics'))
   api(project(':x-pack:plugin:logsdb'))
   implementation project(path: ':libs:native')
+  implementation(testFixtures(project(':libs:native')))
   implementation project(path: ':libs:simdvec')
   implementation (testFixtures(project(path: ':libs:simdvec')))
   implementation project(path: ':libs:swisshash')

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java
@@ -12,7 +12,9 @@
 import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
 import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
 import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
@@ -23,87 +25,20 @@
 import java.util.concurrent.ThreadLocalRandom;
 
 import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.applyI4Corrections;
-import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.dotProductI4SinglePacked;
-import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.unpackNibbles;
+import static org.elasticsearch.nativeaccess.Int4TestUtils.dotProductI4SinglePacked;
+import static org.elasticsearch.nativeaccess.Int4TestUtils.unpackNibbles;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.writePackedVectorWithCorrection;
 
 public class Int4BenchmarkUtils {
 
-    /**
-     * In-memory implementation of {@link QuantizedByteVectorValues} for int4 (PACKED_NIBBLE) benchmarks.
-     * Stores pre-quantized packed nibble vectors with synthetic corrective terms.
-     */
-    static class InMemoryInt4QuantizedByteVectorValues extends QuantizedByteVectorValues {
-
-        private final int dims;
-        private final byte[][] packedVectors;
-        private final OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms;
-        private final float[] centroid;
-        private final float centroidDP;
-        private final OptimizedScalarQuantizer quantizer;
-
-        InMemoryInt4QuantizedByteVectorValues(
-            int dims,
-            byte[][] packedVectors,
-            OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms,
-            float[] centroid,
-            float centroidDP
-        ) {
-            this.dims = dims;
-            this.packedVectors = packedVectors;
-            this.correctiveTerms = correctiveTerms;
-            this.centroid = centroid;
-            this.centroidDP = centroidDP;
-            this.quantizer = new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT);
-        }
-
-        @Override
-        public int dimension() {
-            return dims;
-        }
-
-        @Override
-        public int size() {
-            return packedVectors.length;
-        }
-
-        @Override
-        public byte[] vectorValue(int ord) throws IOException {
-            return packedVectors[ord];
-        }
-
-        @Override
-        public OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int vectorOrd) throws IOException {
-            return correctiveTerms[vectorOrd];
-        }
-
-        @Override
-        public OptimizedScalarQuantizer getQuantizer() {
-            return quantizer;
-        }
-
-        @Override
-        public Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() {
-            return Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE;
-        }
-
-        @Override
-        public float[] getCentroid() throws IOException {
-            return centroid;
-        }
-
-        @Override
-        public float getCentroidDP() throws IOException {
-            return centroidDP;
-        }
+    static final String VECTOR_DATA_FILE = "int4-vector.data";
 
-        @Override
-        public VectorScorer scorer(float[] query) throws IOException {
-            return null;
-        }
-
-        @Override
-        public InMemoryInt4QuantizedByteVectorValues copy() throws IOException {
-            return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
+    static void writeI4VectorData(Directory dir, byte[][] packedVectors, OptimizedScalarQuantizer.QuantizationResult[] corrections)
+        throws IOException {
+        try (IndexOutput out = dir.createOutput(VECTOR_DATA_FILE, IOContext.DEFAULT)) {
+            for (int i = 0; i < packedVectors.length; i++) {
+                writePackedVectorWithCorrection(out, packedVectors[i], corrections[i]);
+            }
         }
     }
 
@@ -142,25 +77,6 @@ public void setScoringOrdinal(int node) throws IOException {
         }
     }
 
-    static QuantizedByteVectorValues createI4QuantizedVectorValues(int dims, byte[][] packedVectors) {
-        var random = ThreadLocalRandom.current();
-        var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[packedVectors.length];
-        for (int i = 0; i < packedVectors.length; i++) {
-            correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult(
-                random.nextFloat(-1f, 1f),
-                random.nextFloat(-1f, 1f),
-                random.nextFloat(-1f, 1f),
-                random.nextInt(0, dims * 15)
-            );
-        }
-        float[] centroid = new float[dims];
-        for (int i = 0; i < dims; i++) {
-            centroid[i] = random.nextFloat();
-        }
-        float centroidDP = random.nextFloat();
-        return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
-    }
-
     static UpdateableRandomVectorScorer createI4ScalarScorer(
         QuantizedByteVectorValues values,
         VectorSimilarityFunction similarityFunction
@@ -179,7 +95,7 @@ static RandomVectorScorer createI4ScalarQueryScorer(
         Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding = values.getScalarEncoding();
 
         byte[] queryQuantized = new byte[encoding.getDiscreteDimensions(dims)];
-        float[] queryCopy = Arrays.copyOf(queryVector, queryVector.length);
+        float[] queryCopy = queryVector.clone();
         if (similarityFunction == VectorSimilarityFunction.COSINE) {
             VectorUtil.l2normalize(queryCopy);
         }
@@ -196,4 +112,50 @@ public float score(int node) throws IOException {
             }
         };
     }
+
+    static OptimizedScalarQuantizer.QuantizationResult[] generateCorrectiveTerms(int dims, int numVectors) {
+        var random = ThreadLocalRandom.current();
+        var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[numVectors];
+        for (int i = 0; i < numVectors; i++) {
+            correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult(
+                random.nextFloat(-1f, 1f),
+                random.nextFloat(-1f, 1f),
+                random.nextFloat(-1f, 1f),
+                random.nextInt(0, dims * 15)
+            );
+        }
+        return correctiveTerms;
+    }
+
+    static float[] generateCentroid(int dims) {
+        var random = ThreadLocalRandom.current();
+        float[] centroid = new float[dims];
+        for (int i = 0; i < dims; i++) {
+            centroid[i] = random.nextFloat();
+        }
+        return centroid;
+    }
+
+    /**
+     * Quantizes a float query vector for use with the native Int4 scorer.
+     * Returns the unpacked quantized bytes (one byte per dimension, 0-15 range).
+     */
+    static QuantizedQuery quantizeQuery(QuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVector)
+        throws IOException {
+        int dims = values.dimension();
+        OptimizedScalarQuantizer quantizer = values.getQuantizer();
+        float[] centroid = values.getCentroid();
+        var encoding = values.getScalarEncoding();
+
+        byte[] scratch = new byte[encoding.getDiscreteDimensions(dims)];
+        float[] queryCopy = queryVector.clone();
+        if (sim == VectorSimilarityFunction.COSINE) {
+            VectorUtil.l2normalize(queryCopy);
+        }
+        var corrections = quantizer.scalarQuantize(queryCopy, scratch, encoding.getQueryBits(), centroid);
+        byte[] unpackedQuery = Arrays.copyOf(scratch, dims);
+        return new QuantizedQuery(unpackedQuery, corrections);
+    }
+
+    record QuantizedQuery(byte[] unpackedQuery, OptimizedScalarQuantizer.QuantizationResult corrections) {}
 }
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java
@@ -75,18 +75,6 @@ static int squareDistance(byte[] a, byte[] b) {
         return res;
     }
 
-    static int dotProductI4SinglePacked(byte[] unpacked, byte[] packed) {
-        int total = 0;
-        for (int i = 0; i < packed.length; i++) {
-            byte packedByte = packed[i];
-            byte unpacked1 = unpacked[i];
-            byte unpacked2 = unpacked[i + packed.length];
-            total += (packedByte & 0x0F) * unpacked2;
-            total += ((packedByte & 0xFF) >> 4) * unpacked1;
-        }
-        return total;
-    }
-
     public static float applyI4Corrections(
         int rawDot,
         int dims,

diff --git a/...ks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java b/...ks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java
@@ -11,9 +11,15 @@
 
 import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
 import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
 import org.elasticsearch.benchmark.Utils;
+import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.simdvec.VectorSimilarityType;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -29,21 +35,30 @@
 import org.openjdk.jmh.annotations.Warmup;
 
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.concurrent.TimeUnit;
 
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.getScorerFactoryOrDie;
 import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier;
 import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer;
 import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments;
-import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.VECTOR_DATA_FILE;
 import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer;
 import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer;
-import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.generateCentroid;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.generateCorrectiveTerms;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.quantizeQuery;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.writeI4VectorData;
+import static org.elasticsearch.nativeaccess.Int4TestUtils.packNibbles;
+import static org.elasticsearch.simdvec.ESVectorUtil.dotProduct;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.createDenseInt4VectorValues;
 import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes;
 
 /**
  * Benchmark that compares int4 packed-nibble quantized vector similarity scoring:
- * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer.
+ * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer vs native.
  * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4Benchmark'
  */
 @Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
@@ -62,28 +77,37 @@ public class VectorScorerInt4Benchmark {
     public int dims;
     public static int numVectors = 2;
 
-    @Param({ "SCALAR", "LUCENE" })
+    @Param
     public VectorImplementation implementation;
 
     @Param({ "DOT_PRODUCT", "EUCLIDEAN" })
     public VectorSimilarityType function;
 
+    private Path path;
+    private Directory dir;
+    private IndexInput in;
+
     private UpdateableRandomVectorScorer scorer;
     private RandomVectorScorer queryScorer;
 
     static class VectorData {
-        final QuantizedByteVectorValues values;
+        final byte[][] packedVectors;
+        final OptimizedScalarQuantizer.QuantizationResult[] corrections;
+        final float[] centroid;
+        final float centroidDp;
         final float[] queryVector;
 
         VectorData(int dims) {
-            byte[][] packedVectors = new byte[numVectors][];
+            packedVectors = new byte[numVectors][];
             ThreadLocalRandom random = ThreadLocalRandom.current();
             for (int v = 0; v < numVectors; v++) {
                 byte[] unpacked = new byte[dims];
                 randomInt4Bytes(random, unpacked);
                 packedVectors[v] = packNibbles(unpacked);
             }
-            values = createI4QuantizedVectorValues(dims, packedVectors);
+            corrections = generateCorrectiveTerms(dims, numVectors);
+            centroid = generateCentroid(dims);
+            centroidDp = dotProduct(centroid, centroid);
             queryVector = new float[dims];
             for (int i = 0; i < dims; i++) {
                 queryVector[i] = random.nextFloat();
@@ -98,7 +122,20 @@ public void setup() throws IOException {
 
     void setup(VectorData vectorData) throws IOException {
         VectorSimilarityFunction similarityFunction = function.function();
-        var values = vectorData.values;
+
+        path = Files.createTempDirectory("Int4ScorerBenchmark");
+        dir = new MMapDirectory(path);
+        writeI4VectorData(dir, vectorData.packedVectors, vectorData.corrections);
+        in = dir.openInput(VECTOR_DATA_FILE, IOContext.DEFAULT);
+
+        QuantizedByteVectorValues values = createDenseInt4VectorValues(
+            dims,
+            numVectors,
+            vectorData.centroid,
+            vectorData.centroidDp,
+            in,
+            similarityFunction
+        );
 
         switch (implementation) {
             case SCALAR:
@@ -111,13 +148,32 @@ void setup(VectorData vectorData) throws IOException {
                     queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector);
                 }
                 break;
+            case NATIVE:
+                var factory = getScorerFactoryOrDie();
+                scorer = factory.getInt4VectorScorerSupplier(function, in, values).orElseThrow().scorer();
+                if (supportsHeapSegments()) {
+                    var qQuery = quantizeQuery(values, similarityFunction, vectorData.queryVector);
+                    queryScorer = factory.getInt4VectorScorer(
+                        similarityFunction,
+                        values,
+                        qQuery.unpackedQuery(),
+                        qQuery.corrections().lowerInterval(),
+                        qQuery.corrections().upperInterval(),
+                        qQuery.corrections().additionalCorrection(),
+                        qQuery.corrections().quantizedComponentSum()
+                    ).orElseThrow();
+                }
+                break;
         }
 
         scorer.setScoringOrdinal(0);
     }
 
     @TearDown
-    public void teardown() throws IOException {}
+    public void teardown() throws IOException {
+        IOUtils.close(in, dir);
+        IOUtils.rm(path);
+    }
 
     @Benchmark
     public float score() throws IOException {