Skip to content

Commit 57e9b32

Browse files
authored
[ES|QL] addressing vector similarity concurrency issue with byte vectors (#137883)
1 parent 366d680 commit 57e9b32

File tree

7 files changed

+81
-58
lines changed

7 files changed

+81
-58
lines changed

docs/changelog/137883.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 137883
2+
summary: Addressing vector similarity concurrency issue with byte vectors
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 137625

muted-tests.yml

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -417,10 +417,6 @@ tests:
417417
- class: org.elasticsearch.xpack.ilm.CCRIndexLifecycleIT
418418
method: testTsdbLeaderIndexRolloverAndSyncAfterWaitUntilEndTime {targetCluster=FOLLOWER}
419419
issue: https://github.com/elastic/elasticsearch/issues/137565
420-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
421-
method: testTopNSimilarityBetweenConstantVectorAndField {functionName=v_l2_norm
422-
similarityFunction=org.elasticsearch.xpack.esql.expression.function.vector.L2Norm$1@5b068087 elementType=byte}
423-
issue: https://github.com/elastic/elasticsearch/issues/137625
424420
- class: org.elasticsearch.xpack.esql.qa.single_node.GenerativeMetricsIT
425421
method: test
426422
issue: https://github.com/elastic/elasticsearch/issues/137655
@@ -436,14 +432,6 @@ tests:
436432
- class: org.elasticsearch.indices.mapping.UpdateMappingIntegrationIT
437433
method: testUpdateMappingConcurrently
438434
issue: https://github.com/elastic/elasticsearch/issues/137758
439-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
440-
method: testSimilarityWithOneDimVector {functionName=v_cosine
441-
similarityFunction=org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity$1@70ab2d48 elementType=byte}
442-
issue: https://github.com/elastic/elasticsearch/issues/137774
443-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
444-
method: testSimilarityWithOneDimVector {functionName=v_cosine
445-
similarityFunction=org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity$1@5b068087 elementType=byte}
446-
issue: https://github.com/elastic/elasticsearch/issues/137778
447435
- class: org.elasticsearch.xpack.inference.integration.CCMPersistentStorageServiceIT
448436
method: testDelete_RemovesCCMConfiguration
449437
issue: https://github.com/elastic/elasticsearch/issues/137786
@@ -453,10 +441,6 @@ tests:
453441
- class: org.elasticsearch.xpack.inference.integration.CCMServiceIT
454442
method: testIsEnabled_ReturnsTrue_WhenCCMConfigurationIsPresent
455443
issue: https://github.com/elastic/elasticsearch/issues/137798
456-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
457-
method: testSimilarityWithOneDimVector {functionName=v_cosine
458-
similarityFunction=org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity$1@3300f4fd elementType=byte}
459-
issue: https://github.com/elastic/elasticsearch/issues/137812
460444
- class: org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceTests
461445
method: testChangingCapacity_DoesNotRejectsOverflowTasks_BecauseOfQueueFull
462446
issue: https://github.com/elastic/elasticsearch/issues/137823
@@ -466,16 +450,9 @@ tests:
466450
- class: org.elasticsearch.xpack.inference.integration.AuthorizationTaskExecutorMultipleNodesIT
467451
method: testAuthorizationTaskGetsRelocatedToAnotherNode_WhenTheNodeThatIsRunningItShutsDown
468452
issue: https://github.com/elastic/elasticsearch/issues/137911
469-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
470-
method: testSimilarityWithOneDimVector {functionName=v_cosine
471-
similarityFunction=org.elasticsearch.xpack.esql.expression.function.vector.CosineSimilarity$1@ebb6851 elementType=byte}
472-
issue: https://github.com/elastic/elasticsearch/issues/137915
473453
- class: org.elasticsearch.xpack.esql.qa.multi_node.GenerativeIT
474454
method: test
475455
issue: https://github.com/elastic/elasticsearch/issues/137909
476-
- class: org.elasticsearch.xpack.esql.vector.VectorSimilarityFunctionsIT
477-
method: testSimilarityWithOneDimVector {functionName=v_cosine similarityFunction=V_COSINE elementType=byte}
478-
issue: https://github.com/elastic/elasticsearch/issues/137975
479456
- class: org.elasticsearch.search.TelemetryMetrics.ShardSearchPhaseAPMMetricsTests
480457
method: testTimeRangeFilterAllResults
481458
issue: https://github.com/elastic/elasticsearch/issues/137979

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2937,9 +2937,6 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
29372937
return switch (cfg.function()) {
29382938
case V_COSINE, V_DOT_PRODUCT, V_HAMMING, V_L1NORM, V_L2NORM -> {
29392939
VectorSimilarityFunctionConfig similarityConfig = (VectorSimilarityFunctionConfig) cfg;
2940-
if (getElementType() == ElementType.BYTE || getElementType() == ElementType.BIT) {
2941-
similarityConfig = similarityConfig.forByteVector();
2942-
}
29432940
yield new DenseVectorBlockLoader<>(
29442941
name(),
29452942
dims,
@@ -3474,22 +3471,18 @@ public static class VectorSimilarityFunctionConfig implements BlockLoaderFunctio
34743471

34753472
private final SimilarityFunction similarityFunction;
34763473
private final float[] vector;
3477-
private byte[] vectorAsBytes;
3474+
private final byte[] vectorAsBytes;
34783475

34793476
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, float[] vector) {
34803477
this.similarityFunction = similarityFunction;
34813478
this.vector = vector;
3479+
this.vectorAsBytes = null;
34823480
}
34833481

3484-
/**
3485-
* Call before calculating byte vector similarities
3486-
*/
3487-
public VectorSimilarityFunctionConfig forByteVector() {
3488-
vectorAsBytes = new byte[vector.length];
3489-
for (int i = 0; i < vector.length; i++) {
3490-
vectorAsBytes[i] = (byte) vector[i];
3491-
}
3492-
return this;
3482+
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, byte[] vectorAsBytes) {
3483+
this.similarityFunction = similarityFunction;
3484+
this.vector = null;
3485+
this.vectorAsBytes = vectorAsBytes;
34933486
}
34943487

34953488
@Override
@@ -3498,11 +3491,12 @@ public Function function() {
34983491
}
34993492

35003493
public byte[] vectorAsBytes() {
3501-
assert vectorAsBytes != null : "vectorAsBytes is null, call forByteVector() first";
3494+
assert vectorAsBytes != null : "vectorAsBytes is null, maybe incorrect element type during construction?";
35023495
return vectorAsBytes;
35033496
}
35043497

35053498
public float[] vector() {
3499+
assert vector != null : "vector is null, maybe incorrect element type during construction?";
35063500
return vector;
35073501
}
35083502

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public class VectorSimilarityFunctionsIT extends AbstractEsqlIntegTestCase {
5050
public static Iterable<Object[]> parameters() throws Exception {
5151
List<Object[]> params = new ArrayList<>();
5252

53-
for (ElementType elementType : Set.of(ElementType.FLOAT, ElementType.BYTE)) {
53+
for (ElementType elementType : Set.of(ElementType.FLOAT, ElementType.BYTE, ElementType.BIT)) {
5454
if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) {
5555
params.add(new Object[] { "v_cosine", CosineSimilarity.SIMILARITY_FUNCTION, elementType });
5656
}
@@ -154,7 +154,7 @@ public void testSimilarityBetweenConstantVectorAndField() {
154154

155155
@SuppressWarnings("unchecked")
156156
public void testSimilarityWithOneDimVector() {
157-
var randomVector = randomVector(1);
157+
var randomVector = randomVector(elementType == ElementType.BIT ? Byte.SIZE : 1);
158158
var query = String.format(Locale.ROOT, """
159159
FROM test
160160
| EVAL similarity = %s(one_dim_vector, %s)
@@ -256,7 +256,7 @@ public void testDifferentDimensions() {
256256
// edge case where this might not throw is if all `left_vector` are null, but the chance is (hopefully!) low enough to ignore
257257
var randomVector = randomValueOtherThan(
258258
null,
259-
() -> randomVector(randomValueOtherThan(numDims, () -> randomIntBetween(32, 64) * 2))
259+
() -> randomVector(randomValueOtherThan(numDims, () -> randomIntBetween(32, 64) * (elementType == ElementType.BIT ? 8 : 2)))
260260
);
261261
var query = String.format(Locale.ROOT, """
262262
FROM test
@@ -306,16 +306,16 @@ private static float[] readVector(List<Number> leftVector) {
306306

307307
@Before
308308
public void setup() throws IOException {
309+
numDims = randomIntBetween(10, 20) * (elementType == ElementType.BIT ? 8 : 2);
309310
createIndexWithDenseVector("test");
310311

311-
numDims = randomIntBetween(10, 20) * 2; // even number
312312
int numDocs = randomIntBetween(10, 100);
313313
this.leftVectors = new ArrayList<>();
314314
IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs];
315315
for (int i = 0; i < numDocs; i++) {
316316
List<Number> leftVector = randomVector();
317317
List<Number> rightVector = randomVector();
318-
List<Number> oneDimVector = randomVector(1);
318+
List<Number> oneDimVector = randomVector(elementType == ElementType.BIT ? Byte.SIZE : 1);
319319
docs[i] = prepareIndex("test").setId("" + i)
320320
.setSource("id", String.valueOf(i), "left_vector", leftVector, "right_vector", rightVector, "one_dim_vector", oneDimVector);
321321
leftVectors.add(leftVector);
@@ -333,11 +333,28 @@ private List<Number> randomVector(int numDims) {
333333
if (rarely()) {
334334
return null;
335335
}
336-
List<Number> vector = new ArrayList<>(numDims);
337-
for (int j = 0; j < numDims; j++) {
336+
int dimensions = numDims;
337+
if (elementType == ElementType.BIT) {
338+
assert dimensions % 8 == 0 : "dimensions must be multiple of 8 for BIT element type but was " + dimensions;
339+
dimensions = dimensions / 8;
340+
}
341+
List<Number> vector = new ArrayList<>(dimensions);
342+
for (int j = 0; j < dimensions; j++) {
338343
switch (elementType) {
339-
case FLOAT -> vector.add(randomFloat());
340-
case BYTE, BIT -> vector.add((byte) randomIntBetween(-128, 127));
344+
case FLOAT -> {
345+
if (dimensions == 1) {
346+
vector.add(randomValueOtherThan(0f, () -> randomFloat()));
347+
} else {
348+
vector.add(randomFloat());
349+
}
350+
}
351+
case BYTE, BIT -> {
352+
if (dimensions == 1) {
353+
vector.add(randomValueOtherThan((byte) 0, () -> (byte) randomIntBetween(-128, 127)));
354+
} else {
355+
vector.add((byte) randomIntBetween(-128, 127));
356+
}
357+
}
341358
default -> throw new IllegalArgumentException("Unexpected element type: " + elementType);
342359
}
343360
}
@@ -352,9 +369,9 @@ private void createIndexWithDenseVector(String indexName) throws IOException {
352369
.startObject("id")
353370
.field("type", "integer")
354371
.endObject();
355-
createDenseVectorField(mapping, "left_vector");
356-
createDenseVectorField(mapping, "right_vector");
357-
createDenseVectorField(mapping, "one_dim_vector");
372+
createDenseVectorField(mapping, "left_vector", elementType, numDims);
373+
createDenseVectorField(mapping, "right_vector", elementType, numDims);
374+
createDenseVectorField(mapping, "one_dim_vector", elementType, elementType == ElementType.BIT ? Byte.SIZE : 1);
358375
mapping.endObject().endObject();
359376
Settings.Builder settingsBuilder = Settings.builder()
360377
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
@@ -364,9 +381,11 @@ private void createIndexWithDenseVector(String indexName) throws IOException {
364381
assertAcked(CreateRequest);
365382
}
366383

367-
private void createDenseVectorField(XContentBuilder mapping, String fieldName) throws IOException {
384+
private static void createDenseVectorField(XContentBuilder mapping, String fieldName, ElementType elementType, int dims)
385+
throws IOException {
368386
mapping.startObject(fieldName)
369387
.field("type", "dense_vector")
388+
.field("dims", dims)
370389
.field("similarity", "l2_norm")
371390
.field("element_type", elementType.toString().toLowerCase(Locale.ROOT))
372391
.startObject("index_options")

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -228,15 +228,30 @@ public final PushedBlockLoaderExpression tryPushToFieldLoading(SearchStats stats
228228
}
229229

230230
List<?> vectorList = (List<?>) literal.value();
231-
float[] vectorArray = new float[vectorList.size()];
232-
for (int i = 0; i < vectorList.size(); i++) {
233-
vectorArray[i] = ((Number) vectorList.get(i)).floatValue();
231+
DenseVectorFieldMapper.ElementType elementType = null;
232+
var fieldType = stats.fieldType(field.fieldName());
233+
if (fieldType instanceof DenseVectorFieldMapper.DenseVectorFieldType) {
234+
elementType = ((DenseVectorFieldMapper.DenseVectorFieldType) fieldType).getElementType();
235+
}
236+
if (elementType == null || elementType == DenseVectorFieldMapper.ElementType.FLOAT) {
237+
float[] floatVector = new float[vectorList.size()];
238+
for (int i = 0; i < vectorList.size(); i++) {
239+
floatVector[i] = ((Number) vectorList.get(i)).floatValue();
240+
}
241+
return new PushedBlockLoaderExpression(
242+
field,
243+
new DenseVectorFieldMapper.VectorSimilarityFunctionConfig(getSimilarityFunction(), floatVector)
244+
);
245+
} else {
246+
byte[] byteVector = new byte[vectorList.size()];
247+
for (int i = 0; i < vectorList.size(); i++) {
248+
byteVector[i] = ((Number) vectorList.get(i)).byteValue();
249+
}
250+
return new PushedBlockLoaderExpression(
251+
field,
252+
new DenseVectorFieldMapper.VectorSimilarityFunctionConfig(getSimilarityFunction(), byteVector)
253+
);
234254
}
235-
236-
return new PushedBlockLoaderExpression(
237-
field,
238-
new DenseVectorFieldMapper.VectorSimilarityFunctionConfig(getSimilarityFunction(), vectorArray)
239-
);
240255
}
241256

242257
interface VectorValueProvider extends Releasable {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,11 @@ public String constantValue(FieldAttribute.FieldName name) {
423423
return val;
424424
}
425425

426+
@Override
427+
public MappedFieldType fieldType(FieldName field) {
428+
return cache.computeIfAbsent(field.string(), this::makeFieldStats).config.fieldType;
429+
}
430+
426431
private interface DocCountTester {
427432
Boolean test(LeafReader leafReader) throws IOException;
428433
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ default String constantValue(FieldName name) {
5454
return null;
5555
}
5656

57+
/**
58+
* Returns the mapped field type for the given field name, or null if the field is not found.
59+
*/
60+
default MappedFieldType fieldType(FieldName name) {
61+
return null;
62+
}
63+
5764
/**
5865
* When there are no search stats available, for example when there are no search contexts, we have static results.
5966
*/

0 commit comments

Comments
 (0)