Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
6acfa0f
Enhance vector scoring with 7-bit quantization support
ah89 Feb 11, 2026
17a960e
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 11, 2026
5589e32
Remove unused docsWriter calls in DiskBBQBulkWriter to streamline bul…
ah89 Feb 11, 2026
0c27ebc
Add support for 7-bit symmetric quantized vectors in MemorySegmentESN…
ah89 Feb 12, 2026
58352e1
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 12, 2026
7eb4f44
Merge branch 'main' into feature/bbq-multibit-quantization-139591
benwtrent Feb 12, 2026
cd27e9c
Add scratch byte array and refactor quantized 7-bit scoring method
ah89 Feb 13, 2026
be47645
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 18, 2026
3bb0df3
[CI] Auto commit changes from spotless
Feb 18, 2026
9e00641
Refactor condition in PanamaESVectorizationProvider for clarity
ah89 Feb 18, 2026
169c37a
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 18, 2026
c820655
Add clamping to 7-bit for binary vectors and queries in VectorScorerO…
ah89 Feb 18, 2026
b41379a
Merge branch 'main' into feature/bbq-multibit-quantization-139591
benwtrent Feb 18, 2026
f9329e3
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 24, 2026
e8f571b
Handles 7-bit quantization and packing for vectors
ah89 Feb 25, 2026
3409834
Switches bits to byte and removes unused method
ah89 Feb 25, 2026
c9cc38c
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 25, 2026
5364e00
Unifies query bits handling and simplifies scorer
ah89 Feb 27, 2026
1c4c310
[CI] Auto commit changes from spotless
Feb 27, 2026
70e594d
Merge branch 'main' into feature/bbq-multibit-quantization-139591
ah89 Feb 27, 2026
ad8dbae
Adds 7-bit quantization support for IVF index
ah89 Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public enum VectorImplementation {
@Param({ "384", "768", "1024" })
public int dims;

@Param({ "1", "2", "4" })
@Param({ "1", "2", "4", "7" })
public byte bits;

@Param
Expand Down Expand Up @@ -137,11 +137,12 @@ static VectorData generateRandomVectorData(
}

int binaryQueryLength = ESNextDiskBBQVectorsFormat.QuantEncoding.fromBits(bits).getQueryPackedLength(dims);
byte queryBits = bits == 7 ? (byte) 7 : (byte) 4;
VectorScorerTestUtils.OSQVectorData[] queryVectors = new VectorScorerTestUtils.OSQVectorData[numVectors];
var query = new float[dims];
for (int i = 0; i < numVectors; i++) {
randomVector(random, query, similarityFunction);
queryVectors[i] = createOSQQueryData(query, centroid, quantizer, dims, (byte) 4, binaryQueryLength);
queryVectors[i] = createOSQQueryData(query, centroid, quantizer, dims, queryBits, binaryQueryLength);
}

return new VectorData(indexVectors, queryVectors, binaryIndexLength, VectorUtil.dotProduct(centroid, centroid));
Expand Down Expand Up @@ -185,6 +186,10 @@ void setup(VectorData data) throws IOException {
docBits = 4;
yield 4;
}
case 7 -> {
docBits = 7;
yield 7;
}
default -> throw new IllegalArgumentException("Unsupported bits: " + bits);
};
scorer = switch (implementation) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ public class ESNextOSQVectorsScorer {
protected final float[] upperIntervals;
protected final int[] targetComponentSums;
protected final float[] additionalCorrections;
private final byte[] scratch;

public ESNextOSQVectorsScorer(IndexInput in, byte queryBits, byte indexBits, int dimensions, int dataLength, int bulkSize) {
if (queryBits != 4 || (indexBits != 1 && indexBits != 2 && indexBits != 4)) {
if (indexBits == 7) {
if (queryBits != 7) {
throw new IllegalArgumentException("Only symmetric 7-bit query supported for 7-bit index");
}
} else if (queryBits != 4 || (indexBits != 1 && indexBits != 2 && indexBits != 4)) {
throw new IllegalArgumentException("Only asymmetric 4-bit query and 1, 2 or 4-bit index supported");
}
this.in = in;
Expand All @@ -61,6 +66,7 @@ public ESNextOSQVectorsScorer(IndexInput in, byte queryBits, byte indexBits, int
this.targetComponentSums = new int[bulkSize];
this.additionalCorrections = new float[bulkSize];
this.bulkSize = bulkSize;
this.scratch = indexBits == 7 ? new byte[dimensions] : null;
}

public ESNextOSQVectorsScorer(IndexInput in, byte queryBits, byte indexBits, int dimensions, int dataLength) {
Expand Down Expand Up @@ -88,9 +94,17 @@ public long quantizeScore(byte[] q) throws IOException {
return quantized4BitScoreSymmetric(q);
}
}
if (indexBits == 7) {
return quantized7BitScore(q);
}
throw new IllegalArgumentException("Only 1-bit index supported");
}

private long quantized7BitScore(byte[] q) throws IOException {
in.readBytes(scratch, 0, dimensions);
return VectorUtil.dotProduct(scratch, q);
}

private long quantized4BitScoreSymmetric(byte[] q) throws IOException {
assert q.length == length : "length mismatch q " + q.length + " vs " + length;
assert length % 4 == 0 : "length must be multiple of 4 for 4-bit index length: " + length + " dimensions: " + dimensions;
Expand Down Expand Up @@ -174,6 +188,12 @@ public void quantizeScoreBulk(byte[] q, int count, float[] scores) throws IOExce
}
throw new IllegalArgumentException("Only symmetric 4-bit query supported for 4-bit index");
}
if (indexBits == 7) {
for (int i = 0; i < count; i++) {
scores[i] = quantizeScore(q);
}
return;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.IndexInput;
import org.elasticsearch.simdvec.internal.MemorySegmentES92Int7VectorsScorer;

import java.io.IOException;
import java.lang.foreign.MemorySegment;

/** Vectorized scorer for 7-bit symmetric quantized vectors stored as a {@link MemorySegment}. */
final class MSD7Q7ESNextOSQVectorsScorer extends MemorySegmentESNextOSQVectorsScorer.MemorySegmentScorer {

private final MemorySegmentES92Int7VectorsScorer int7Scorer;

MSD7Q7ESNextOSQVectorsScorer(IndexInput in, int dimensions, int dataLength, int bulkSize, MemorySegment memorySegment) {
super(in, dimensions, dataLength, bulkSize, memorySegment);
this.int7Scorer = new MemorySegmentES92Int7VectorsScorer(in, dimensions, bulkSize, memorySegment);
}

@Override
long quantizeScore(byte[] q) throws IOException {
return int7Scorer.int7DotProduct(q);
}

@Override
boolean quantizeScoreBulk(byte[] q, int count, float[] scores) throws IOException {
int7Scorer.int7DotProductBulk(q, count, scores);
return true;
}

@Override
float scoreBulk(
byte[] q,
float queryLowerInterval,
float queryUpperInterval,
int queryComponentSum,
float queryAdditionalCorrection,
VectorSimilarityFunction similarityFunction,
float centroidDp,
float[] scores,
int bulkSize
) throws IOException {
int7Scorer.scoreBulk(
q,
queryLowerInterval,
queryUpperInterval,
queryComponentSum,
queryAdditionalCorrection,
similarityFunction,
centroidDp,
scores,
bulkSize
);
float maxScore = Float.NEGATIVE_INFINITY;
for (int i = 0; i < bulkSize; i++) {
if (scores[i] > maxScore) {
maxScore = scores[i];
}
}
return maxScore;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ public MemorySegmentESNextOSQVectorsScorer(
this.scorer = new MSInt4SymmetricESNextOSQVectorsScorer(in, dimensions, dataLength, bulkSize, memorySegment);
} else if (queryBits == 4 && indexBits == 2) {
this.scorer = new MSDibitToInt4ESNextOSQVectorsScorer(in, dimensions, dataLength, bulkSize, memorySegment);
} else if (queryBits == 7 && indexBits == 7) {
this.scorer = new MSD7Q7ESNextOSQVectorsScorer(in, dimensions, dataLength, bulkSize, memorySegment);
} else {
throw new IllegalArgumentException("Only asymmetric 4-bit query and 1-bit index supported");
throw new IllegalArgumentException("Unsupported query/index bits combination: " + queryBits + "/" + indexBits);
}
}

Expand Down Expand Up @@ -147,7 +149,7 @@ public float scoreBulk(
}

abstract static sealed class MemorySegmentScorer permits MSBitToInt4ESNextOSQVectorsScorer, MSDibitToInt4ESNextOSQVectorsScorer,
MSInt4SymmetricESNextOSQVectorsScorer {
MSInt4SymmetricESNextOSQVectorsScorer, MSD7Q7ESNextOSQVectorsScorer {

// TODO: split Panama and Native implementations
static final boolean NATIVE_SUPPORTED = NativeAccess.instance().getVectorSimilarityFunctions().isPresent();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ public ESNextOSQVectorsScorer newESNextOSQVectorsScorer(
unwrappedInput = MemorySegmentAccessInputAccess.unwrap(unwrappedInput);
if (PanamaESVectorUtilSupport.HAS_FAST_INTEGER_VECTORS
&& unwrappedInput instanceof MemorySegmentAccessInput msai
&& queryBits == 4
&& (indexBits == 1 || indexBits == 2 || indexBits == 4)) {
&& ((queryBits == 4 && (indexBits == 1 || indexBits == 2 || indexBits == 4)) || (queryBits == 7 && indexBits == 7))) {
MemorySegment ms = msai.segmentSliceOrNull(0, unwrappedInput.length());
if (ms != null) {
return new MemorySegmentESNextOSQVectorsScorer(unwrappedInput, queryBits, indexBits, dimension, dataLength, bulkSize, ms);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,24 @@ public class ESNextOSQVectorsScorerTests extends BaseVectorizationTests {

private final DirectoryType directoryType;
private final byte indexBits;
private final byte queryBits;
private final VectorSimilarityFunction similarityFunction;
private static final byte queryBits = 4;

public enum DirectoryType {
NIOFS,
MMAP,
SNAP
}

public ESNextOSQVectorsScorerTests(DirectoryType directoryType, byte indexBits, VectorSimilarityFunction similarityFunction) {
public ESNextOSQVectorsScorerTests(
DirectoryType directoryType,
byte indexBits,
byte queryBits,
VectorSimilarityFunction similarityFunction
) {
this.directoryType = directoryType;
this.indexBits = indexBits;
this.queryBits = queryBits;
this.similarityFunction = similarityFunction;
}

Expand All @@ -63,33 +69,34 @@ public void testQuantizeScore() throws Exception {
final int length = ESNextDiskBBQVectorsFormat.QuantEncoding.fromBits(indexBits).getDocPackedLength(dimensions);

final byte[] vector = new byte[length];

final int queryBytes = length * (queryBits / indexBits);

try (Directory dir = newParametrizedDirectory()) {
try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) {
for (int i = 0; i < numVectors; i++) {
random().nextBytes(vector);
if (indexBits == 7) clampTo7Bit(vector, dimensions);
out.writeBytes(vector, 0, length);
}
CodecUtil.writeFooter(out);
}
final byte[] query = new byte[queryBytes];
random().nextBytes(query);
if (indexBits == 7) clampTo7Bit(query, dimensions);
try (IndexInput in = dir.openInput("tests.bin", IOContext.DEFAULT)) {
// Work on a slice that has just the right number of bytes to make the test fail with an
// index-out-of-bounds in case the implementation reads more than the allowed number of
// padding bytes.
final IndexInput slice = in.slice("test", 0, (long) length * numVectors);
final var defaultScorer = defaultProvider().newESNextOSQVectorsScorer(
final ESNextOSQVectorsScorer defaultScorer = defaultProvider().newESNextOSQVectorsScorer(
slice,
queryBits,
indexBits,
dimensions,
length,
ESNextOSQVectorsScorer.BULK_SIZE
);
final var panamaScorer = maybePanamaProvider().newESNextOSQVectorsScorer(
final ESNextOSQVectorsScorer panamaScorer = maybePanamaProvider().newESNextOSQVectorsScorer(
in,
queryBits,
indexBits,
Expand Down Expand Up @@ -135,7 +142,9 @@ public void testScore() throws Exception {
final float[] query = new float[dimensions];
randomVector(random(), query, similarityFunction);

final int queryVectorPackedLengthInBytes = indexVectorPackedLengthInBytes * (queryBits / indexBits);
final int queryVectorPackedLengthInBytes = indexBits == 7
? ESNextDiskBBQVectorsFormat.QuantEncoding.fromBits(indexBits).getQueryPackedLength(dimensions)
: indexVectorPackedLengthInBytes * (queryBits / indexBits);
var queryData = createOSQQueryData(query, centroid, quantizer, dimensions, queryBits, queryVectorPackedLengthInBytes);

final float centroidDp = VectorUtil.dotProduct(centroid, centroid);
Expand Down Expand Up @@ -250,7 +259,9 @@ private void doTestScoreBulk(int bulkSize) throws Exception {
}
final float[] query = new float[dimensions];
randomVector(random(), query, similarityFunction);
final int queryVectorPackedLengthInBytes = indexVectorPackedLengthInBytes * (queryBits / indexBits);
final int queryVectorPackedLengthInBytes = indexBits == 7
? ESNextDiskBBQVectorsFormat.QuantEncoding.fromBits(indexBits).getQueryPackedLength(dimensions)
: indexVectorPackedLengthInBytes * (queryBits / indexBits);
var queryData = createOSQQueryData(query, centroid, quantizer, dimensions, queryBits, queryVectorPackedLengthInBytes);

final float centroidDp = VectorUtil.dotProduct(centroid, centroid);
Expand Down Expand Up @@ -334,6 +345,7 @@ public void testScoreBulkWithNegativeInfinityScore() throws Exception {
byte[] vector = new byte[length];
for (int i = 0; i < bulkSize; i++) {
random().nextBytes(vector);
if (indexBits == 7) clampTo7Bit(vector, dimensions);
out.writeBytes(vector, 0, length);
}
// All-zero corrections: zero bytes are interpreted identically regardless of byte order
Expand All @@ -344,6 +356,7 @@ public void testScoreBulkWithNegativeInfinityScore() throws Exception {

byte[] query = new byte[queryBytes];
random().nextBytes(query);
if (indexBits == 7) clampTo7Bit(query, dimensions);

float[] scoresDefault = new float[bulkSize];
float[] scoresPanama = new float[bulkSize];
Expand Down Expand Up @@ -402,6 +415,12 @@ public void testScoreBulkWithNegativeInfinityScore() throws Exception {
}
}

private static void clampTo7Bit(byte[] vector, int dimensions) {
for (int i = 0; i < dimensions; i++) {
vector[i] = (byte) (vector[i] & 0x7F);
}
}

private Directory newParametrizedDirectory() throws IOException {
return switch (directoryType) {
case NIOFS -> new NIOFSDirectory(createTempDir());
Expand All @@ -412,8 +431,14 @@ private Directory newParametrizedDirectory() throws IOException {

@ParametersFactory
public static Iterable<Object[]> parametersFactory() {
return () -> Stream.of((byte) 1, (byte) 2, (byte) 4)
.flatMap(i -> Arrays.stream(DirectoryType.values()).map(f -> List.of(f, i)))
var bitCombinations = List.of(
List.of((byte) 1, (byte) 4),
List.of((byte) 2, (byte) 4),
List.of((byte) 4, (byte) 4),
List.of((byte) 7, (byte) 7)
);
return () -> bitCombinations.stream()
.flatMap(bits -> Arrays.stream(DirectoryType.values()).map(d -> List.of(d, bits.get(0), bits.get(1))))
.flatMap(p -> Arrays.stream(VectorSimilarityFunction.values()).map(f -> Stream.concat(p.stream(), Stream.of(f)).toArray()))
.iterator();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
import org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat;
import org.elasticsearch.simdvec.ESVectorUtil;

import java.io.IOException;
import java.util.Random;
Expand Down Expand Up @@ -79,7 +78,7 @@ public static OSQVectorData createOSQQueryData(
centroid
);
final byte[] quantizeQuery = new byte[queryVectorPackedLengthInBytes];
ESVectorUtil.transposeHalfByte(scratch, quantizeQuery);
ESNextDiskBBQVectorsFormat.QuantEncoding.fromBits(queryBits).packQuery(scratch, quantizeQuery);

return new OSQVectorData(
quantizeQuery,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,9 @@ public static void main(String[] args) throws Exception {
private static void checkQuantizeBits(TestConfiguration args) {
switch (args.indexType()) {
case IVF:
if (args.quantizeBits() == null || !Set.of(1, 2, 4).contains(args.quantizeBits())) {
if (args.quantizeBits() == null || !Set.of(1, 2, 4, 7).contains(args.quantizeBits())) {
throw new IllegalArgumentException(
"IVF index type only supports 1, 2 or 4 bits quantization, but got: " + args.quantizeBits()
"IVF index type only supports 1, 2, 4 or 7 bits quantization, but got: " + args.quantizeBits()
);
}
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ public void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOExcepti
int limit = qvv.count() - bulkSize + 1;
int i = 0;
for (; i < limit; i += bulkSize) {
if (docsWriter != null) {
docsWriter.accept(i);
}
for (int j = 0; j < bulkSize; j++) {
byte[] qv = qvv.next();
corrections[j] = qvv.getCorrections();
Expand All @@ -219,6 +222,9 @@ public void writeVectors(QuantizedVectorValues qvv, CheckedIntConsumer<IOExcepti
writeCorrections(corrections);
}
// write tail
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this doesn't break the scoring? none of the other bits encode the tail as blocks yet.

//cc @tteofili

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, this is likely to affect recall, @ah89 did you check with KnnIndexTester ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since LargeBitDiskBBQBulkWriter was seemingly never instantiated before case 7 was introduced—with cases 1, 2, and 4 handled by SmallBitDiskBBQBulkWriter—removing it corrupts the on-disk layout, preventing the reader from locating the doc IDs at the start of the block.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, this is likely to affect recall, @ah89 did you check with KnnIndexTester ?

Verified with KnnIndexTester on GIST-1M (960 dims, 100K docs, 100 queries, euclidean, IVF):

index_type quantize_bits recall latency (ms) QPS visited
ivf 4 0.56 0.16–0.20 5000–6250 2374
ivf 7 0.61 0.66–0.68 1470–1515 2362

7-bit shows higher recall (0.61 vs 0.56) as expected from reduced quantization error. The docsWriter fix is validated — if doc IDs were missing or misaligned, recall would drop to near zero.

as per my previous comment LargeBitDiskBBQBulkWriter was never instantiated before this PR (only case 7 routes to it, previously only 1/2/4 existed). The docsWriter.accept(i) calls mirror what SmallBitDiskBBQBulkWriter already does — writing doc IDs at the start of each bulk blokc so the reader can associate scores back to documents.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, the thing is that now the ESNextDiskBBQVectorsWriter always calls DiskBBQBulkWriter#fromBitSize with the blockEncodeTailVectors parameter set to true (I was in fact thinking of dropping the parameter entirely), therefore you should see LargeBitEncodedDiskBBQBulkWriter being used with 7 bits.

Copy link
Copy Markdown
Contributor Author

@ah89 ah89 Feb 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, so I will remove the docsWriter calls from the dead LargeBitDiskBBQBulkWriter (never instantiated), but will keep them on LargeBitEncodedDiskBBQBulkWriter -- the one that's actually used.

if (i < qvv.count() && docsWriter != null) {
docsWriter.accept(i);
}
OptimizedScalarQuantizer.QuantizationResult[] tailCorrections = new OptimizedScalarQuantizer.QuantizationResult[qvv.count()
- i];
int j = 0;
Expand Down
Loading