diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java
index 91cd12e2d87b8..ed0ddce3268a4 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/BenchmarkUtils.java
@@ -11,6 +11,7 @@
 
 import org.apache.lucene.backward_codecs.lucene99.OffHeapQuantizedByteVectorValues;
 import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer;
 import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
 import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
 import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer;
@@ -134,6 +135,21 @@ static RandomVectorScorer luceneScorer(QuantizedByteVectorValues values, VectorS
         return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec);
     }
 
+    static RandomVectorScorerSupplier lucene104ScoreSupplier(
+        org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues values,
+        VectorSimilarityFunction sim
+    ) throws IOException {
+        return new Lucene104ScalarQuantizedVectorScorer(null).getRandomVectorScorerSupplier(sim, values);
+    }
+
+    static RandomVectorScorer lucene104Scorer(
+        org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues values,
+        VectorSimilarityFunction sim,
+        float[] queryVec
+    ) throws IOException {
+        return new Lucene104ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec);
+    }
+
     static RuntimeException rethrow(Throwable t) {
         if (t instanceof Error err) {
             throw err;
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java
new file mode 100644
index 0000000000000..deec4d9ea4201
--- /dev/null
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/Int4BenchmarkUtils.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat;
+import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.util.VectorUtil;
+import org.apache.lucene.util.hnsw.RandomVectorScorer;
+import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.concurrent.ThreadLocalRandom;
+
+import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.applyI4Corrections;
+import static org.elasticsearch.benchmark.vector.scorer.ScalarOperations.dotProductI4SinglePacked;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.unpackNibbles;
+
+public class Int4BenchmarkUtils {
+
+    /**
+     * In-memory implementation of {@link QuantizedByteVectorValues} for int4 (PACKED_NIBBLE) benchmarks.
+     * Stores pre-quantized packed nibble vectors with synthetic corrective terms.
+     */
+    static class InMemoryInt4QuantizedByteVectorValues extends QuantizedByteVectorValues {
+
+        private final int dims;
+        private final byte[][] packedVectors;
+        private final OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms;
+        private final float[] centroid;
+        private final float centroidDP;
+        private final OptimizedScalarQuantizer quantizer;
+
+        InMemoryInt4QuantizedByteVectorValues(
+            int dims,
+            byte[][] packedVectors,
+            OptimizedScalarQuantizer.QuantizationResult[] correctiveTerms,
+            float[] centroid,
+            float centroidDP
+        ) {
+            this.dims = dims;
+            this.packedVectors = packedVectors;
+            this.correctiveTerms = correctiveTerms;
+            this.centroid = centroid;
+            this.centroidDP = centroidDP;
+            this.quantizer = new OptimizedScalarQuantizer(VectorSimilarityFunction.DOT_PRODUCT);
+        }
+
+        @Override
+        public int dimension() {
+            return dims;
+        }
+
+        @Override
+        public int size() {
+            return packedVectors.length;
+        }
+
+        @Override
+        public byte[] vectorValue(int ord) throws IOException {
+            return packedVectors[ord];
+        }
+
+        @Override
+        public OptimizedScalarQuantizer.QuantizationResult getCorrectiveTerms(int vectorOrd) throws IOException {
+            return correctiveTerms[vectorOrd];
+        }
+
+        @Override
+        public OptimizedScalarQuantizer getQuantizer() {
+            return quantizer;
+        }
+
+        @Override
+        public Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding getScalarEncoding() {
+            return Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.PACKED_NIBBLE;
+        }
+
+        @Override
+        public float[] getCentroid() throws IOException {
+            return centroid;
+        }
+
+        @Override
+        public float getCentroidDP() throws IOException {
+            return centroidDP;
+        }
+
+        @Override
+        public VectorScorer scorer(float[] query) throws IOException {
+            return null;
+        }
+
+        @Override
+        public InMemoryInt4QuantizedByteVectorValues copy() throws IOException {
+            return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
+        }
+    }
+
+    private static class ScalarScorer implements UpdateableRandomVectorScorer {
+        private final QuantizedByteVectorValues values;
+        private final int dims;
+        private final VectorSimilarityFunction similarityFunction;
+
+        private byte[] queryUnpacked;
+        private OptimizedScalarQuantizer.QuantizationResult queryCorrections;
+
+        ScalarScorer(QuantizedByteVectorValues values, VectorSimilarityFunction similarityFunction) {
+            this.values = values;
+            this.dims = values.dimension();
+            this.similarityFunction = similarityFunction;
+        }
+
+        @Override
+        public float score(int node) throws IOException {
+            byte[] packed = values.vectorValue(node);
+            int rawDot = dotProductI4SinglePacked(queryUnpacked, packed);
+            var nodeCorrections = values.getCorrectiveTerms(node);
+            return applyI4Corrections(rawDot, dims, nodeCorrections, queryCorrections, values.getCentroidDP(), similarityFunction);
+        }
+
+        @Override
+        public int maxOrd() {
+            return values.size();
+        }
+
+        @Override
+        public void setScoringOrdinal(int node) throws IOException {
+            byte[] packed = values.vectorValue(node);
+            queryUnpacked = unpackNibbles(packed, dims);
+            queryCorrections = values.getCorrectiveTerms(node);
+        }
+    }
+
+    static QuantizedByteVectorValues createI4QuantizedVectorValues(int dims, byte[][] packedVectors) {
+        var random = ThreadLocalRandom.current();
+        var correctiveTerms = new OptimizedScalarQuantizer.QuantizationResult[packedVectors.length];
+        for (int i = 0; i < packedVectors.length; i++) {
+            correctiveTerms[i] = new OptimizedScalarQuantizer.QuantizationResult(
+                random.nextFloat(-1f, 1f),
+                random.nextFloat(-1f, 1f),
+                random.nextFloat(-1f, 1f),
+                random.nextInt(0, dims * 15)
+            );
+        }
+        float[] centroid = new float[dims];
+        for (int i = 0; i < dims; i++) {
+            centroid[i] = random.nextFloat();
+        }
+        float centroidDP = random.nextFloat();
+        return new InMemoryInt4QuantizedByteVectorValues(dims, packedVectors, correctiveTerms, centroid, centroidDP);
+    }
+
+    static UpdateableRandomVectorScorer createI4ScalarScorer(
+        QuantizedByteVectorValues values,
+        VectorSimilarityFunction similarityFunction
+    ) {
+        return new ScalarScorer(values, similarityFunction);
+    }
+
+    static RandomVectorScorer createI4ScalarQueryScorer(
+        QuantizedByteVectorValues values,
+        VectorSimilarityFunction similarityFunction,
+        float[] queryVector
+    ) throws IOException {
+        int dims = values.dimension();
+        OptimizedScalarQuantizer quantizer = values.getQuantizer();
+        float[] centroid = values.getCentroid();
+        Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding = values.getScalarEncoding();
+
+        byte[] queryQuantized = new byte[encoding.getDiscreteDimensions(dims)];
+        float[] queryCopy = Arrays.copyOf(queryVector, queryVector.length);
+        if (similarityFunction == VectorSimilarityFunction.COSINE) {
+            VectorUtil.l2normalize(queryCopy);
+        }
+        var queryCorrections = quantizer.scalarQuantize(queryCopy, queryQuantized, encoding.getQueryBits(), centroid);
+        float centroidDP = values.getCentroidDP();
+
+        return new RandomVectorScorer.AbstractRandomVectorScorer(values) {
+            @Override
+            public float score(int node) throws IOException {
+                byte[] packed = values.vectorValue(node);
+                int rawDot = dotProductI4SinglePacked(queryQuantized, packed);
+                var nodeCorrections = values.getCorrectiveTerms(node);
+                return applyI4Corrections(rawDot, dims, nodeCorrections, queryCorrections, centroidDP, similarityFunction);
+            }
+        };
+    }
+}
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java
index fe5cfdce8a399..5ef0ba5a9982a 100644
--- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/ScalarOperations.java
@@ -9,6 +9,13 @@
 
 package org.elasticsearch.benchmark.vector.scorer;
 
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.util.VectorUtil;
+import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
+
+import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
+import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
+
 /**
  * Basic scalar implementations of similarity operations.
  * <p>
@@ -17,6 +24,8 @@
  */
 class ScalarOperations {
 
+    private static final float FOUR_BIT_SCALE = 1f / ((1 << 4) - 1);
+
     static float cosine(byte[] a, byte[] b) {
         int sum = 0;
         int norm1 = 0;
@@ -65,4 +74,54 @@ static int squareDistance(byte[] a, byte[] b) {
         }
         return res;
     }
+
+    static int dotProductI4SinglePacked(byte[] unpacked, byte[] packed) {
+        int total = 0;
+        for (int i = 0; i < packed.length; i++) {
+            byte packedByte = packed[i];
+            byte unpacked1 = unpacked[i];
+            byte unpacked2 = unpacked[i + packed.length];
+            total += (packedByte & 0x0F) * unpacked2;
+            total += ((packedByte & 0xFF) >> 4) * unpacked1;
+        }
+        return total;
+    }
+
+    public static float applyI4Corrections(
+        int rawDot,
+        int dims,
+        OptimizedScalarQuantizer.QuantizationResult nodeCorrections,
+        OptimizedScalarQuantizer.QuantizationResult queryCorrections,
+        float centroidDP,
+        VectorSimilarityFunction similarityFunction
+    ) {
+        float ax = nodeCorrections.lowerInterval();
+        float lx = (nodeCorrections.upperInterval() - ax) * FOUR_BIT_SCALE;
+        float ay = queryCorrections.lowerInterval();
+        float ly = (queryCorrections.upperInterval() - ay) * FOUR_BIT_SCALE;
+        float x1 = nodeCorrections.quantizedComponentSum();
+        float y1 = queryCorrections.quantizedComponentSum();
+
+        float score = ax * ay * dims + ay * lx * x1 + ax * ly * y1 + lx * ly * rawDot;
+
+        // For euclidean, we need to invert the score and apply the additional correction, which is
+        // assumed to be the squared l2norm of the centroid centered vectors.
+        if (similarityFunction == EUCLIDEAN) {
+            score = queryCorrections.additionalCorrection() + nodeCorrections.additionalCorrection() - 2 * score;
+            // Ensure that 'score' (the squared euclidean distance) is non-negative. The computed value
+            // may be negative as a result of quantization loss.
+            return VectorUtil.normalizeDistanceToUnitInterval(Math.max(score, 0f));
+        } else {
+            // For cosine and max inner product, we need to apply the additional correction, which is
+            // assumed to be the non-centered dot-product between the vector and the centroid
+            score += queryCorrections.additionalCorrection() + nodeCorrections.additionalCorrection() - centroidDP;
+            if (similarityFunction == MAXIMUM_INNER_PRODUCT) {
+                return VectorUtil.scaleMaxInnerProductScore(score);
+            }
+            // Ensure that 'score' (a normalized dot product) is in [-1,1]. The computed value may be out
+            // of bounds as a result of quantization loss.
+            score = Math.clamp(score, -1, 1);
+            return VectorUtil.normalizeToUnitInterval(score);
+        }
+    }
 }
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java
new file mode 100644
index 0000000000000..f943ec9fb08ad
--- /dev/null
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4Benchmark.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.util.hnsw.RandomVectorScorer;
+import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.elasticsearch.benchmark.Utils;
+import org.elasticsearch.simdvec.VectorSimilarityType;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier;
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer;
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes;
+
+/**
+ * Benchmark that compares int4 packed-nibble quantized vector similarity scoring:
+ * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer.
+ * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4Benchmark'
+ */
+@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 3)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Thread)
+public class VectorScorerInt4Benchmark {
+
+    static {
+        Utils.configureBenchmarkLogging();
+    }
+
+    @Param({ "96", "768", "1024" })
+    public int dims;
+    public static int numVectors = 2;
+
+    @Param({ "SCALAR", "LUCENE" })
+    public VectorImplementation implementation;
+
+    @Param({ "DOT_PRODUCT", "EUCLIDEAN" })
+    public VectorSimilarityType function;
+
+    private UpdateableRandomVectorScorer scorer;
+    private RandomVectorScorer queryScorer;
+
+    static class VectorData {
+        final QuantizedByteVectorValues values;
+        final float[] queryVector;
+
+        VectorData(int dims) {
+            byte[][] packedVectors = new byte[numVectors][];
+            ThreadLocalRandom random = ThreadLocalRandom.current();
+            for (int v = 0; v < numVectors; v++) {
+                byte[] unpacked = new byte[dims];
+                randomInt4Bytes(random, unpacked);
+                packedVectors[v] = packNibbles(unpacked);
+            }
+            values = createI4QuantizedVectorValues(dims, packedVectors);
+            queryVector = new float[dims];
+            for (int i = 0; i < dims; i++) {
+                queryVector[i] = random.nextFloat();
+            }
+        }
+    }
+
+    @Setup
+    public void setup() throws IOException {
+        setup(new VectorData(dims));
+    }
+
+    void setup(VectorData vectorData) throws IOException {
+        VectorSimilarityFunction similarityFunction = function.function();
+        var values = vectorData.values;
+
+        switch (implementation) {
+            case SCALAR:
+                scorer = createI4ScalarScorer(values, similarityFunction);
+                queryScorer = createI4ScalarQueryScorer(values, similarityFunction, vectorData.queryVector);
+                break;
+            case LUCENE:
+                scorer = lucene104ScoreSupplier(values, similarityFunction).scorer();
+                if (supportsHeapSegments()) {
+                    queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector);
+                }
+                break;
+        }
+
+        scorer.setScoringOrdinal(0);
+    }
+
+    @TearDown
+    public void teardown() throws IOException {}
+
+    @Benchmark
+    public float score() throws IOException {
+        return scorer.score(1);
+    }
+
+    @Benchmark
+    public float scoreQuery() throws IOException {
+        return queryScorer.score(1);
+    }
+}
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java
new file mode 100644
index 0000000000000..e5331fa3cbdb1
--- /dev/null
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmark.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import org.apache.lucene.codecs.lucene104.QuantizedByteVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.util.hnsw.RandomVectorScorer;
+import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
+import org.elasticsearch.benchmark.Utils;
+import org.elasticsearch.simdvec.VectorSimilarityType;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104ScoreSupplier;
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.lucene104Scorer;
+import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4QuantizedVectorValues;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarQueryScorer;
+import static org.elasticsearch.benchmark.vector.scorer.Int4BenchmarkUtils.createI4ScalarScorer;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes;
+
+/**
+ * Benchmark that compares bulk scoring of int4 packed-nibble quantized vectors:
+ * scalar vs Lucene's Lucene104ScalarQuantizedVectorScorer, across sequential
+ * and random access patterns.
+ * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4BulkBenchmark'
+ */
+@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 3)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@State(Scope.Thread)
+public class VectorScorerInt4BulkBenchmark {
+
+    static {
+        Utils.configureBenchmarkLogging();
+    }
+
+    @Param({ "1024" })
+    public int dims;
+
+    @Param({ "128", "1500", "130000" })
+    public int numVectors;
+    public int numVectorsToScore;
+
+    @Param({ "16", "32", "64", "256", "1024" })
+    public int bulkSize;
+
+    @Param({ "SCALAR", "LUCENE" })
+    public VectorImplementation implementation;
+
+    @Param({ "DOT_PRODUCT", "EUCLIDEAN" })
+    public VectorSimilarityType function;
+
+    private float[] scores;
+    private int[] ordinals;
+    private int[] ids;
+    private int[] toScore;
+
+    private UpdateableRandomVectorScorer scorer;
+    private RandomVectorScorer queryScorer;
+
+    static class VectorData {
+        final int numVectorsToScore;
+        final QuantizedByteVectorValues values;
+        final int[] ordinals;
+        final int targetOrd;
+        final float[] queryVector;
+
+        VectorData(int dims, int numVectors, int numVectorsToScore) {
+            this.numVectorsToScore = numVectorsToScore;
+            byte[][] packedVectors = new byte[numVectors][];
+            ThreadLocalRandom random = ThreadLocalRandom.current();
+            for (int v = 0; v < numVectors; v++) {
+                byte[] unpacked = new byte[dims];
+                randomInt4Bytes(random, unpacked);
+                packedVectors[v] = packNibbles(unpacked);
+            }
+            values = createI4QuantizedVectorValues(dims, packedVectors);
+
+            List<Integer> list = IntStream.range(0, numVectors).boxed().collect(Collectors.toList());
+            Collections.shuffle(list, random);
+            ordinals = list.stream().limit(numVectorsToScore).mapToInt(Integer::intValue).toArray();
+            targetOrd = random.nextInt(numVectors);
+
+            queryVector = new float[dims];
+            for (int i = 0; i < dims; i++) {
+                queryVector[i] = random.nextFloat();
+            }
+        }
+    }
+
+    @Setup
+    public void setup() throws IOException {
+        setup(new VectorData(dims, numVectors, Math.min(numVectors, 20_000)));
+    }
+
+    void setup(VectorData vectorData) throws IOException {
+        VectorSimilarityFunction similarityFunction = function.function();
+        var values = vectorData.values;
+
+        numVectorsToScore = vectorData.numVectorsToScore;
+        scores = new float[bulkSize];
+        toScore = new int[bulkSize];
+        ids = IntStream.range(0, numVectors).toArray();
+        ordinals = vectorData.ordinals;
+
+        switch (implementation) {
+            case SCALAR:
+                scorer = createI4ScalarScorer(values, similarityFunction);
+                queryScorer = createI4ScalarQueryScorer(values, similarityFunction, vectorData.queryVector);
+                break;
+            case LUCENE:
+                scorer = lucene104ScoreSupplier(values, similarityFunction).scorer();
+                if (supportsHeapSegments()) {
+                    queryScorer = lucene104Scorer(values, similarityFunction, vectorData.queryVector);
+                }
+                break;
+        }
+
+        scorer.setScoringOrdinal(vectorData.targetOrd);
+    }
+
+    @TearDown
+    public void teardown() throws IOException {}
+
+    @Benchmark
+    public float[] scoreMultipleSequential() throws IOException {
+        int v = 0;
+        while (v < numVectorsToScore) {
+            for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
+                scores[i] = scorer.score(v);
+            }
+        }
+        return scores;
+    }
+
+    @Benchmark
+    public float[] scoreMultipleRandom() throws IOException {
+        int v = 0;
+        while (v < numVectorsToScore) {
+            for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
+                scores[i] = scorer.score(ordinals[v]);
+            }
+        }
+        return scores;
+    }
+
+    @Benchmark
+    public float[] scoreQueryMultipleRandom() throws IOException {
+        int v = 0;
+        while (v < numVectorsToScore) {
+            for (int i = 0; i < bulkSize && v < numVectorsToScore; i++, v++) {
+                scores[i] = queryScorer.score(ordinals[v]);
+            }
+        }
+        return scores;
+    }
+
+    @Benchmark
+    public float[] scoreMultipleSequentialBulk() throws IOException {
+        for (int i = 0; i < numVectorsToScore; i += bulkSize) {
+            int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
+            System.arraycopy(ids, i, toScore, 0, toScoreInThisBatch);
+            scorer.bulkScore(toScore, scores, toScoreInThisBatch);
+        }
+        return scores;
+    }
+
+    @Benchmark
+    public float[] scoreMultipleRandomBulk() throws IOException {
+        for (int i = 0; i < numVectorsToScore; i += bulkSize) {
+            int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
+            System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
+            scorer.bulkScore(toScore, scores, toScoreInThisBatch);
+        }
+        return scores;
+    }
+
+    @Benchmark
+    public float[] scoreQueryMultipleRandomBulk() throws IOException {
+        for (int i = 0; i < numVectorsToScore; i += bulkSize) {
+            int toScoreInThisBatch = Math.min(bulkSize, numVectorsToScore - i);
+            System.arraycopy(ordinals, i, toScore, 0, toScoreInThisBatch);
+            queryScorer.bulkScore(toScore, scores, toScoreInThisBatch);
+        }
+        return scores;
+    }
+}
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java
new file mode 100644
index 0000000000000..a6a222ba1f044
--- /dev/null
+++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmark.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+package org.elasticsearch.benchmark.vector.scorer;
+
+import org.apache.lucene.util.VectorUtil;
+import org.elasticsearch.benchmark.Utils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.packNibbles;
+import static org.elasticsearch.simdvec.internal.vectorization.VectorScorerTestUtils.randomInt4Bytes;
+
+/**
+ * Benchmark comparing raw int4 packed-nibble dot product implementations:
+ * scalar (plain loop) vs Lucene (Panama-vectorized VectorUtil).
+ * Run with ./gradlew -p benchmarks run --args 'VectorScorerInt4OperationBenchmark'
+ */
+@Fork(value = 3, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 1)
+public class VectorScorerInt4OperationBenchmark {
+
+    static {
+        Utils.configureBenchmarkLogging();
+    }
+
+    public byte[] unpacked;
+    public byte[] packed;
+
+    @Param({ "2", "128", "208", "256", "300", "512", "702", "1024", "1536", "2048" })
+    public int size;
+
+    @Setup(Level.Iteration)
+    public void init() {
+        unpacked = new byte[size];
+        randomInt4Bytes(ThreadLocalRandom.current(), unpacked);
+        packed = packNibbles(unpacked);
+    }
+
+    @Benchmark
+    public int scalar() {
+        return ScalarOperations.dotProductI4SinglePacked(unpacked, packed);
+    }
+
+    @Benchmark
+    public int lucene() {
+        return VectorUtil.int4DotProductSinglePacked(unpacked, packed);
+    }
+}
diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java
new file mode 100644
index 0000000000000..dd632058bb962
--- /dev/null
+++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BenchmarkTests.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.simdvec.VectorSimilarityType;
+import org.elasticsearch.test.ESTestCase;
+import org.openjdk.jmh.annotations.Param;
+
+import java.util.Arrays;
+
+public class VectorScorerInt4BenchmarkTests extends ESTestCase {
+
+    private final double delta = 1e-3;
+    private final VectorSimilarityType function;
+    private final int dims;
+
+    public VectorScorerInt4BenchmarkTests(VectorSimilarityType function, int dims) {
+        this.function = function;
+        this.dims = dims;
+    }
+
+    public void testScores() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var data = new VectorScorerInt4Benchmark.VectorData(dims);
+
+            var scalar = new VectorScorerInt4Benchmark();
+            scalar.function = function;
+            scalar.implementation = VectorImplementation.SCALAR;
+            scalar.dims = dims;
+            scalar.setup(data);
+
+            var lucene = new VectorScorerInt4Benchmark();
+            lucene.function = function;
+            lucene.implementation = VectorImplementation.LUCENE;
+            lucene.dims = dims;
+            lucene.setup(data);
+
+            try {
+                float expected = scalar.score();
+                assertEquals("LUCENE score", expected, lucene.score(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testQueryScores() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var data = new VectorScorerInt4Benchmark.VectorData(dims);
+
+            var scalar = new VectorScorerInt4Benchmark();
+            scalar.function = function;
+            scalar.implementation = VectorImplementation.SCALAR;
+            scalar.dims = dims;
+            scalar.setup(data);
+
+            var lucene = new VectorScorerInt4Benchmark();
+            lucene.function = function;
+            lucene.implementation = VectorImplementation.LUCENE;
+            lucene.dims = dims;
+            lucene.setup(data);
+
+            try {
+                float expected = scalar.scoreQuery();
+                assertEquals("LUCENE scoreQuery", expected, lucene.scoreQuery(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parametersFactory() {
+        try {
+            String[] dims = VectorScorerInt4Benchmark.class.getField("dims").getAnnotationsByType(Param.class)[0].value();
+            String[] functions = VectorScorerInt4Benchmark.class.getField("function").getAnnotationsByType(Param.class)[0].value();
+            return () -> Arrays.stream(dims)
+                .map(Integer::parseInt)
+                .flatMap(d -> Arrays.stream(functions).map(f -> new Object[] { VectorSimilarityType.valueOf(f), d }))
+                .iterator();
+        } catch (NoSuchFieldException e) {
+            throw new AssertionError(e);
+        }
+    }
+}
diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java
new file mode 100644
index 0000000000000..8af55d30e6a97
--- /dev/null
+++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4BulkBenchmarkTests.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.simdvec.VectorSimilarityType;
+import org.elasticsearch.test.ESTestCase;
+import org.openjdk.jmh.annotations.Param;
+
+import java.util.Arrays;
+
+public class VectorScorerInt4BulkBenchmarkTests extends ESTestCase {
+
+    private final VectorSimilarityType function;
+    private final float delta = 1e-3f;
+    private final int dims;
+
+    public VectorScorerInt4BulkBenchmarkTests(VectorSimilarityType function, int dims) {
+        this.function = function;
+        this.dims = dims;
+    }
+
+    private VectorScorerInt4BulkBenchmark createBench(VectorImplementation impl, VectorScorerInt4BulkBenchmark.VectorData vectorData)
+        throws Exception {
+        var bench = new VectorScorerInt4BulkBenchmark();
+        bench.function = function;
+        bench.implementation = impl;
+        bench.dims = dims;
+        bench.numVectors = 1000;
+        bench.numVectorsToScore = 200;
+        bench.bulkSize = 200;
+        bench.setup(vectorData);
+        return bench;
+    }
+
+    public void testSequential() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreMultipleSequential();
+                assertArrayEquals("LUCENE sequential", expected, lucene.scoreMultipleSequential(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testRandom() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreMultipleRandom();
+                assertArrayEquals("LUCENE random", expected, lucene.scoreMultipleRandom(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testQueryRandom() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreQueryMultipleRandom();
+                assertArrayEquals("LUCENE queryRandom", expected, lucene.scoreQueryMultipleRandom(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testSequentialBulk() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreMultipleSequentialBulk();
+                assertArrayEquals("LUCENE sequentialBulk", expected, lucene.scoreMultipleSequentialBulk(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testRandomBulk() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreMultipleRandomBulk();
+                assertArrayEquals("LUCENE randomBulk", expected, lucene.scoreMultipleRandomBulk(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    public void testQueryRandomBulk() throws Exception {
+        for (int i = 0; i < 100; i++) {
+            var vectorData = new VectorScorerInt4BulkBenchmark.VectorData(dims, 1000, 200);
+            var scalar = createBench(VectorImplementation.SCALAR, vectorData);
+            var lucene = createBench(VectorImplementation.LUCENE, vectorData);
+
+            try {
+                float[] expected = scalar.scoreQueryMultipleRandomBulk();
+                assertArrayEquals("LUCENE queryRandomBulk", expected, lucene.scoreQueryMultipleRandomBulk(), delta);
+            } finally {
+                scalar.teardown();
+                lucene.teardown();
+            }
+        }
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parametersFactory() {
+        try {
+            String[] dims = VectorScorerInt4BulkBenchmark.class.getField("dims").getAnnotationsByType(Param.class)[0].value();
+            String[] functions = VectorScorerInt4BulkBenchmark.class.getField("function").getAnnotationsByType(Param.class)[0].value();
+            return () -> Arrays.stream(dims)
+                .map(Integer::parseInt)
+                .flatMap(d -> Arrays.stream(functions).map(f -> new Object[] { VectorSimilarityType.valueOf(f), d }))
+                .iterator();
+        } catch (NoSuchFieldException e) {
+            throw new AssertionError(e);
+        }
+    }
+}
diff --git a/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java
new file mode 100644
index 0000000000000..d2aa18f3f27cb
--- /dev/null
+++ b/benchmarks/src/test/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt4OperationBenchmarkTests.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark.vector.scorer;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.test.ESTestCase;
+import org.openjdk.jmh.annotations.Param;
+
+import java.util.Arrays;
+
+public class VectorScorerInt4OperationBenchmarkTests extends ESTestCase {
+
+    private final int size;
+
+    public VectorScorerInt4OperationBenchmarkTests(int size) {
+        this.size = size;
+    }
+
+    public void test() {
+        for (int i = 0; i < 100; i++) {
+            var bench = new VectorScorerInt4OperationBenchmark();
+            bench.size = size;
+            bench.init();
+
+            int expected = bench.scalar();
+            assertEquals(expected, bench.lucene());
+        }
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parametersFactory() {
+        try {
+            String[] sizes = VectorScorerInt4OperationBenchmark.class.getField("size").getAnnotationsByType(Param.class)[0].value();
+            return () -> Arrays.stream(sizes).map(Integer::parseInt).map(s -> new Object[] { s }).iterator();
+        } catch (NoSuchFieldException e) {
+            throw new AssertionError(e);
+        }
+    }
+}
diff --git a/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java b/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java
index 196bd31045881..5b6b52a102c0a 100644
--- a/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java
+++ b/libs/simdvec/src/testFixtures/java/org/elasticsearch/simdvec/internal/vectorization/VectorScorerTestUtils.java
@@ -210,4 +210,56 @@ public static void randomVector(Random random, float[] vector, VectorSimilarityF
             VectorUtil.l2normalize(vector);
         }
     }
+
+    public static void randomInt4Bytes(Random random, byte[] bytes) {
+        for (int i = 0, len = bytes.length; i < len;) {
+            bytes[i++] = (byte) random.nextInt(0, 0x10);
+        }
+    }
+
+    /**
+     * Packs unpacked int4 values (one value per byte) into Lucene nibble-packed format (two values per byte)
+     * written by {@code Lucene104ScalarQuantizedVectorsWriter} (ScalarEncoding#PACKED_NIBBLE format).
+     * <p>
+     * The unpacked input comes from {@link OptimizedScalarQuantizer#scalarQuantize}, which quantizes a float
+     * vector into one byte per element in natural order: unpacked = [v0, v1, v2, ..., v_{N-1}] where N = dims.
+     * <p>
+     * The packed format pairs elements that are packedLength ({@param unpacked} length / 2) apart. For example,
+     * with dims=8, unpacked.length is 8 and packedLength is 4:
+     *   - {@code packed[0] = (v0 << 4) | v4}
+     *   - {@code packed[1] = (v1 << 4) | v5}
+     *   - {@code packed[2] = (v2 << 4) | v6}
+     *   - {@code packed[3] = (v3 << 4) | v7}
+     * <p>
+     * Or, visually,
+     * UNPACKED (8 bytes, natural vector order, one 4-bit value per byte):
+     *   index:   0     1     2     3     4     5     6     7
+     *          [v0]  [v1]  [v2]  [v3]  [v4]  [v5]  [v6]  [v7]
+     *   PACKED (4 bytes, on disk, two 4-bit values per byte):
+     *   index:      0          1          2          3
+     *          [v0  | v4]  [v1 | v5]  [v2 | v6]  [v3 | v7]
+     *           hi    lo    hi   lo    hi   lo    hi   lo
+     *          7..4  3..0  7..4 3..0  7..4 3..0  7..4 3..0
+     */
+    public static byte[] packNibbles(byte[] unpacked) {
+        int packedLength = unpacked.length / 2;
+        byte[] packed = new byte[packedLength];
+        for (int i = 0; i < packedLength; i++) {
+            packed[i] = (byte) ((unpacked[i] << 4) | (unpacked[i + packedLength] & 0x0F));
+        }
+        return packed;
+    }
+
+    /**
+     * Unpacks "nibble-packed" int4 values (two values per byte) into a byte[] (one value per byte)
+     */
+    public static byte[] unpackNibbles(byte[] packed, int dims) {
+        byte[] unpacked = new byte[dims];
+        int packedLen = packed.length;
+        for (int i = 0; i < packedLen; i++) {
+            unpacked[i] = (byte) ((packed[i] & 0xFF) >> 4);
+            unpacked[i + packedLen] = (byte) (packed[i] & 0x0F);
+        }
+        return unpacked;
+    }
 }