diff --git a/docs/changelog/142974.yaml b/docs/changelog/142974.yaml new file mode 100644 index 0000000000000..3183b835092f1 --- /dev/null +++ b/docs/changelog/142974.yaml @@ -0,0 +1,6 @@ +area: ES|QL +issues: + - 139928 +pr: 142974 +summary: Add support for `dense_vector` in `COALESCE` +type: enhancement diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/coalesce.md b/docs/reference/query-languages/esql/_snippets/functions/types/coalesce.md index 3cfa43ebc3679..6667008bf957d 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/types/coalesce.md +++ b/docs/reference/query-languages/esql/_snippets/functions/types/coalesce.md @@ -10,6 +10,7 @@ | cartesian_shape | cartesian_shape | cartesian_shape | | date | date | date | | date_nanos | date_nanos | date_nanos | +| dense_vector | dense_vector | dense_vector | | exponential_histogram {applies_to}`stack: preview 9.3.0` | exponential_histogram {applies_to}`stack: preview 9.3.0` | exponential_histogram | | geo_point | geo_point | geo_point | | geo_shape | geo_shape | geo_shape | diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/coalesce.json b/docs/reference/query-languages/esql/kibana/definition/functions/coalesce.json index 5803287897946..2697dc12ea08c 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/coalesce.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/coalesce.json @@ -106,6 +106,24 @@ "variadic" : true, "returnType" : "date_nanos" }, + { + "params" : [ + { + "name" : "first", + "type" : "dense_vector", + "optional" : false, + "description" : "Expression to evaluate." + }, + { + "name" : "rest", + "type" : "dense_vector", + "optional" : true, + "description" : "Other expression to evaluate." + } + ], + "variadic" : true, + "returnType" : "dense_vector" + }, { "params" : [ { diff --git a/x-pack/plugin/esql/build.gradle b/x-pack/plugin/esql/build.gradle index ca0c10e4d88e7..22fd512c7c6f9 100644 --- a/x-pack/plugin/esql/build.gradle +++ b/x-pack/plugin/esql/build.gradle @@ -531,6 +531,7 @@ def prop(Name, Type, type, TYPE, BYTES, Array) { "int" : type == "int" ? "true" : "", "long" : type == "long" ? "true" : "", "double" : type == "double" ? "true" : "", + "float" : type == "float" ? "true" : "", "BytesRef" : type == "BytesRef" ? "true" : "", "boolean" : type == "boolean" ? "true" : "", "nanosMillis": Name == "NanosMillis" ? "true" : "", @@ -544,6 +545,7 @@ tasks.named('stringTemplates').configure { var nanosMillisProperties = prop("NanosMillis", "Long", "long", "LONG", "Long.BYTES", "LongArray") var millisNanosProperties = prop("MillisNanos", "Long", "long", "LONG", "Long.BYTES", "LongArray") var doubleProperties = prop("Double", "Double", "double", "DOUBLE", "Double.BYTES", "DoubleArray") + var floatProperties = prop("Float", "Float", "float", "FLOAT", "Float.BYTES", "FloatArray") var bytesRefProperties = prop("BytesRef", "BytesRef", "BytesRef", "BYTES_REF", "org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF", "") var booleanProperties = prop("Boolean", "Boolean", "boolean", "BOOLEAN", "Byte.BYTES", "BitArray") var expHistoProperties = prop("ExponentialHistogram", "ExponentialHistogram", "ExponentialHistogram", "EXPONENTIAL_HISTOGRAM", "", "") @@ -607,6 +609,11 @@ tasks.named('stringTemplates').configure { it.inputFile = coalesceInputFile it.outputFile = "org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceDoubleEvaluator.java" } + template { + it.properties = floatProperties + it.inputFile = coalesceInputFile + it.outputFile = "org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceFloatEvaluator.java" + } template { it.properties = bytesRefProperties it.inputFile = coalesceInputFile diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 52add91cdf48c..d07e4fcc4b817 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -176,6 +176,7 @@ public class CsvTestsDataLoader { new TestDataset("dense_vector_text"), new TestDataset("mv_text"), new TestDataset("dense_vector"), + new TestDataset("dense_vector_coalesce").withRequiredCapabilities(EsqlCapabilities.Cap.COALESCE_DENSE_VECTOR), new TestDataset("dense_vector_bfloat16").withRequiredCapabilities(EsqlCapabilities.Cap.GENERIC_VECTOR_FORMAT), new TestDataset("dense_vector_arithmetic"), new TestDataset("web_logs"), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv index b2d865b0885e1..5479fdb90fd25 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv @@ -3,4 +3,4 @@ id:l, float_vector:dense_vector, byte_vector:dense_vector, bit_vector:dense_vect 1, [4.0, 5.0, 6.0], [40, 50, 60], [45, 9] 2, [9.0, 8.0, 7.0], [90, 80, 70], [127, 0] 3, [0.054, 0.032, 0.012], [100, 110, 120], [88, 53] -4, , , +4, , , diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector_coalesce.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector_coalesce.csv new file mode 100644 index 0000000000000..9dac8c2ba1794 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector_coalesce.csv @@ -0,0 +1,6 @@ +id:l, float_vector:dense_vector, float_vector_2:dense_vector, byte_vector:dense_vector, byte_vector_2:dense_vector, bit_vector:dense_vector, bit_vector_2:dense_vector, bfloat16_vector:dense_vector, bfloat16_vector_2:dense_vector +0, [1.0, 2.0, 3.0], [0.0, 0.0, 0.0], [10, 20, 30], [0, 0, 0], [13, 112], [0, 1], [1.0, 2.0, 3.0], [0.0, 0.0, 0.0] +1, [4.0, 5.0, 6.0], [0.0, 0.0, 0.0], [40, 50, 60], [0, 0, 0], [45, 9], [2, 3], [4.0, 5.0, 6.0], [0.0, 0.0, 0.0] +2, [9.0, 8.0, 7.0], [0.0, 0.0, 0.0], [90, 80, 70], [0, 0, 0], [127, 0], [4, 5], [9.0, 8.0, 7.0], [0.0, 0.0, 0.0] +3, [0.054, 0.032, 0.012], [0.0, 0.0, 0.0], [100, 110, 120], [0, 0, 0], [88, 53], [6, 7], [1.0, 5.0, 7.0], [0.0, 0.0, 0.0] +4, , [0.5, 0.5, 0.5], , [1, 2, 3], , [99, 100], , [1.0, 2.0, 3.0] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bfloat16.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bfloat16.csv-spec index cb7d6e8164238..9a867c5655fa5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bfloat16.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bfloat16.csv-spec @@ -56,6 +56,39 @@ id:l | new_vector:dense_vector 4 | null ; +coalesceBfloat16VectorWithFallback +required_capability: coalesce_dense_vector + +FROM dense_vector_coalesce +| EVAL v = COALESCE(bfloat16_vector, to_dense_vector([0, 0, 0])) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [1.0, 5.0, 7.0] +4 | [0.0, 0.0, 0.0] +; + +coalesceBfloat16VectorTwoColumns +required_capability: coalesce_dense_vector + +FROM dense_vector_coalesce +| EVAL v = COALESCE(bfloat16_vector, bfloat16_vector_2) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [1.0, 5.0, 7.0] +4 | [1.0, 2.0, 3.0] +; denseVectorAggregationFunctions required_capability: dense_vector_agg_functions diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bit.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bit.csv-spec index cb705a6120432..581da6753d38e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bit.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-bit.csv-spec @@ -52,6 +52,39 @@ id:l | new_vector:dense_vector 4 | null ; +coalesceBitVectorWithFallback +required_capability: coalesce_dense_vector + +FROM dense_vector +| EVAL v = COALESCE(bit_vector, to_dense_vector([0, 0])) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [13, 112] +1 | [45, 9] +2 | [127, 0] +3 | [88, 53] +4 | [0, 0] +; + +coalesceBitVectorTwoColumns +required_capability: coalesce_dense_vector + +FROM dense_vector_coalesce +| EVAL v = COALESCE(bit_vector, bit_vector_2) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [13, 112] +1 | [45, 9] +2 | [127, 0] +3 | [88, 53] +4 | [99, 100] +; denseVectorAggregationFunctions required_capability: dense_vector_agg_functions diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-byte.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-byte.csv-spec index f7cdec42cd0b0..d88f1fb8abe93 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-byte.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector-byte.csv-spec @@ -52,6 +52,39 @@ id:l | new_vector:dense_vector 4 | null ; +coalesceByteVectorWithFallback +required_capability: coalesce_dense_vector + +FROM dense_vector +| EVAL v = COALESCE(byte_vector, to_dense_vector([0, 0, 0])) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [10, 20, 30] +1 | [40, 50, 60] +2 | [90, 80, 70] +3 | [100, 110, 120] +4 | [0, 0, 0] +; + +coalesceByteVectorTwoColumns +required_capability: coalesce_dense_vector + +FROM dense_vector_coalesce +| EVAL v = COALESCE(byte_vector, byte_vector_2) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [10, 20, 30] +1 | [40, 50, 60] +2 | [90, 80, 70] +3 | [100, 110, 120] +4 | [1, 2, 3] +; denseVectorAggregationFunctions required_capability: dense_vector_agg_functions diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index 732b50bea2a33..3f910fb632eaf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -179,3 +179,37 @@ count_vectors:long 1 ; +coalesceDenseVectorWithFallback +required_capability: coalesce_dense_vector + +FROM dense_vector +| EVAL v = COALESCE(float_vector, to_dense_vector([0, 0, 0])) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [0.054, 0.032, 0.012] +4 | [0.0, 0.0, 0.0] +; + +coalesceDenseVectorTwoColumns +required_capability: coalesce_dense_vector + +FROM dense_vector_coalesce +| EVAL v = COALESCE(float_vector, float_vector_2) +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [0.054, 0.032, 0.012] +4 | [0.5, 0.5, 0.5] +; + diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector.json index f6e0097e167da..5fffad68e775e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector.json @@ -35,3 +35,4 @@ } } } + diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector_coalesce.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector_coalesce.json new file mode 100644 index 0000000000000..38def43b93f7c --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/index/mappings/mapping-dense_vector_coalesce.json @@ -0,0 +1,87 @@ +{ + "properties": { + "id": { + "type": "long" + }, + "float_vector": { + "type": "dense_vector", + "similarity": "l2_norm", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "float_vector_2": { + "type": "dense_vector", + "similarity": "l2_norm", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "byte_vector": { + "type": "dense_vector", + "similarity": "l2_norm", + "element_type": "byte", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "byte_vector_2": { + "type": "dense_vector", + "similarity": "l2_norm", + "element_type": "byte", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "bit_vector": { + "type": "dense_vector", + "dims": 16, + "similarity": "l2_norm", + "element_type": "bit", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "bit_vector_2": { + "type": "dense_vector", + "dims": 16, + "similarity": "l2_norm", + "element_type": "bit", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "bfloat16_vector": { + "type": "dense_vector", + "similarity": "l2_norm", + "element_type": "bfloat16", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + }, + "bfloat16_vector_2": { + "type": "dense_vector", + "similarity": "l2_norm", + "element_type": "bfloat16", + "index_options": { + "type": "hnsw", + "m": 16, + "ef_construction": 100 + } + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated-src/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceFloatEvaluator.java b/x-pack/plugin/esql/src/main/generated-src/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceFloatEvaluator.java new file mode 100644 index 0000000000000..ea7ba1c2ebd7a --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated-src/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceFloatEvaluator.java @@ -0,0 +1,255 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.nulls; + +// begin generated imports +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; + +import java.util.List; +import java.util.stream.IntStream; +// end generated imports + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Coalesce}. + * This class is generated. Edit {@code X-CoalesceEvaluator.java.st} instead. + */ +abstract sealed class CoalesceFloatEvaluator implements EvalOperator.ExpressionEvaluator permits + CoalesceFloatEvaluator.CoalesceFloatEagerEvaluator, // + CoalesceFloatEvaluator.CoalesceFloatLazyEvaluator { + + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(CoalesceFloatEvaluator.class); + + static ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator, List children) { + List childEvaluators = children.stream().map(toEvaluator::apply).toList(); + if (childEvaluators.stream().allMatch(ExpressionEvaluator.Factory::eagerEvalSafeInLazy)) { + return new ExpressionEvaluator.Factory() { + @Override + public ExpressionEvaluator get(DriverContext context) { + return new CoalesceFloatEagerEvaluator( + // comment to make spotless happy about line breaks + context, + childEvaluators.stream().map(x -> x.get(context)).toList() + ); + } + + @Override + public String toString() { + return "CoalesceFloatEagerEvaluator[values=" + childEvaluators + ']'; + } + }; + } + return new ExpressionEvaluator.Factory() { + @Override + public ExpressionEvaluator get(DriverContext context) { + return new CoalesceFloatLazyEvaluator( + // comment to make spotless happy about line breaks + context, + childEvaluators.stream().map(x -> x.get(context)).toList() + ); + } + + @Override + public String toString() { + return "CoalesceFloatLazyEvaluator[values=" + childEvaluators + ']'; + } + }; + } + + protected final DriverContext driverContext; + protected final List evaluators; + + protected CoalesceFloatEvaluator(DriverContext driverContext, List evaluators) { + this.driverContext = driverContext; + this.evaluators = evaluators; + } + + @Override + public final FloatBlock eval(Page page) { + return entireBlock(page); + } + + /** + * Evaluate COALESCE for an entire {@link Block} for as long as we can, then shift to + * {@link #perPosition} evaluation. + *

+ * Entire Block evaluation is the "normal" way to run the compute engine, + * just calling {@link EvalOperator.ExpressionEvaluator#eval}. It's much faster so we try + * that first. For each evaluator, we {@linkplain EvalOperator.ExpressionEvaluator#eval} and: + *

+ *
    + *
  • If the {@linkplain Block} doesn't have any nulls we return it. COALESCE done.
  • + *
  • If the {@linkplain Block} is only nulls we skip it and try the next evaluator.
  • + *
  • If this is the last evaluator we just return it. COALESCE done.
  • + *
  • + * Otherwise, the {@linkplain Block} has mixed nulls and non-nulls so we drop + * into a per position evaluator. + *
  • + *
+ */ + private FloatBlock entireBlock(Page page) { + int lastFullBlockIdx = 0; + while (true) { + FloatBlock lastFullBlock = (FloatBlock) evaluators.get(lastFullBlockIdx++).eval(page); + if (lastFullBlockIdx == evaluators.size() || lastFullBlock.asVector() != null) { + return lastFullBlock; + } + if (lastFullBlock.areAllValuesNull()) { + // Result is all nulls and isn't the last result so we don't need any of it. + lastFullBlock.close(); + continue; + } + // The result has some nulls and some non-nulls. + return perPosition(page, lastFullBlock, lastFullBlockIdx); + } + } + + /** + * Evaluate each position of the incoming {@link Page} for COALESCE + * independently. Our attempt to evaluate entire blocks has yielded + * a block that contains some nulls and some non-nulls and we have + * to fill in the nulls with the results of calling the remaining + * evaluators. + *

+ * This must not return warnings caused by + * evaluating positions for which a previous evaluator returned + * non-null. These are positions that, at least from the perspective + * of a compute engine user, don't have to be + * evaluated. Put another way, this must function as though + * {@code COALESCE} were per-position lazy. It can manage that + * any way it likes. + *

+ */ + protected abstract FloatBlock perPosition(Page page, FloatBlock lastFullBlock, int firstToEvaluate); + + @Override + public final String toString() { + return getClass().getSimpleName() + "[values=" + evaluators + ']'; + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + for (ExpressionEvaluator e : evaluators) { + baseRamBytesUsed += e.baseRamBytesUsed(); + } + return baseRamBytesUsed; + } + + @Override + public final void close() { + Releasables.closeExpectNoException(() -> Releasables.close(evaluators)); + } + + /** + * Evaluates {@code COALESCE} eagerly per position if entire-block evaluation fails. + * First we evaluate all remaining evaluators, and then we pluck the first non-null + * value from each one. This is much faster than + * {@link CoalesceFloatLazyEvaluator} but will include spurious warnings if any of the + * evaluators make them so we only use it for evaluators that are + * {@link Factory#eagerEvalSafeInLazy safe} to evaluate eagerly + * in a lazy environment. + */ + static final class CoalesceFloatEagerEvaluator extends CoalesceFloatEvaluator { + CoalesceFloatEagerEvaluator(DriverContext driverContext, List evaluators) { + super(driverContext, evaluators); + } + + @Override + protected FloatBlock perPosition(Page page, FloatBlock lastFullBlock, int firstToEvaluate) { + int positionCount = page.getPositionCount(); + FloatBlock[] flatten = new FloatBlock[evaluators.size() - firstToEvaluate + 1]; + try { + flatten[0] = lastFullBlock; + for (int f = 1; f < flatten.length; f++) { + flatten[f] = (FloatBlock) evaluators.get(firstToEvaluate + f - 1).eval(page); + } + try ( + FloatBlock.Builder result = driverContext.blockFactory() // + .newFloatBlockBuilder(positionCount) + ) { + position: for (int p = 0; p < positionCount; p++) { + for (FloatBlock f : flatten) { + if (false == f.isNull(p)) { + result.copyFrom(f, p); + continue position; + } + } + result.appendNull(); + } + return result.build(); + } + } finally { + Releasables.close(flatten); + } + } + } + + /** + * Evaluates {@code COALESCE} lazily per position if entire-block evaluation fails. + * For each position we either: + *
    + *
  • Take the non-null values from the {@code lastFullBlock}
  • + *
  • + * Evaluator the remaining evaluators one at a time, keeping + * the first non-null value. + *
  • + *
+ */ + static final class CoalesceFloatLazyEvaluator extends CoalesceFloatEvaluator { + CoalesceFloatLazyEvaluator(DriverContext driverContext, List evaluators) { + super(driverContext, evaluators); + } + + @Override + protected FloatBlock perPosition(Page page, FloatBlock lastFullBlock, int firstToEvaluate) { + int positionCount = page.getPositionCount(); + try ( + FloatBlock.Builder result = driverContext.blockFactory() // + .newFloatBlockBuilder(positionCount) + ) { + position: for (int p = 0; p < positionCount; p++) { + if (lastFullBlock.isNull(p) == false) { + result.copyFrom(lastFullBlock, p, p + 1); + continue; + } + int[] positions = new int[] { p }; + Page limited = new Page( + 1, + IntStream.range(0, page.getBlockCount()) + .mapToObj(b -> page.getBlock(b).filter(false, positions)) + .toArray(Block[]::new) + ); + try (Releasable ignored = limited::releaseBlocks) { + for (int e = firstToEvaluate; e < evaluators.size(); e++) { + try (FloatBlock block = (FloatBlock) evaluators.get(e).eval(limited)) { + if (false == block.isNull(0)) { + result.copyFrom(block, 0); + continue position; + } + } + } + result.appendNull(); + } + } + return result.build(); + } finally { + lastFullBlock.close(); + } + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 7b70621514d99..81445c2f76a44 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1599,6 +1599,11 @@ public enum Cap { */ TO_DENSE_VECTOR_FUNCTION, + /** + * COALESCE function support for dense_vector type. + */ + COALESCE_DENSE_VECTOR, + /** * Multivalued query parameters */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java index 26ac8708a7581..ca96611a14e97 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java @@ -49,6 +49,7 @@ public class Coalesce extends EsqlScalarFunction implements OptionalArgument { "cartesian_shape", "date_nanos", "date", + "dense_vector", "histogram", "geo_point", "geo_shape", @@ -75,6 +76,7 @@ public Coalesce( "cartesian_shape", "date_nanos", "date", + "dense_vector", "histogram", "geo_point", "geo_shape", @@ -99,6 +101,7 @@ public Coalesce( "cartesian_shape", "date_nanos", "date", + "dense_vector", "histogram", "geo_point", "geo_shape", @@ -218,9 +221,10 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { .toEvaluator(toEvaluator, children()); case EXPONENTIAL_HISTOGRAM -> CoalesceExponentialHistogramEvaluator.toEvaluator(toEvaluator, children()); case TDIGEST -> CoalesceTDigestEvaluator.toEvaluator(toEvaluator, children()); + case DENSE_VECTOR -> CoalesceFloatEvaluator.toEvaluator(toEvaluator, children()); case NULL -> EvalOperator.CONSTANT_NULL_FACTORY; case UNSUPPORTED, SHORT, BYTE, DATE_PERIOD, OBJECT, DOC_DATA_TYPE, SOURCE, TIME_DURATION, FLOAT, HALF_FLOAT, TSID_DATA_TYPE, - SCALED_FLOAT, AGGREGATE_METRIC_DOUBLE, DENSE_VECTOR, DATE_RANGE -> throw new UnsupportedOperationException( + SCALED_FLOAT, AGGREGATE_METRIC_DOUBLE, DATE_RANGE -> throw new UnsupportedOperationException( dataType() + " can't be coalesced" ); }; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java index bede3ee59521d..d6f85bd3060cb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java @@ -52,6 +52,7 @@ import static org.elasticsearch.compute.data.BlockUtils.toJavaObject; import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral; import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.appliesTo; +import static org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier.randomDenseVector; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; @@ -165,6 +166,20 @@ public static Iterable parameters() { equalTo(firstHisto == null ? secondHisto : firstHisto) ); })); + noNullsSuppliers.add(new TestCaseSupplier(List.of(DataType.DENSE_VECTOR, DataType.DENSE_VECTOR), () -> { + int dimensions = between(64, 128); + List firstVector = randomBoolean() ? null : randomDenseVector(dimensions); + List secondVector = randomDenseVector(dimensions); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(firstVector, DataType.DENSE_VECTOR, "first"), + new TestCaseSupplier.TypedData(secondVector, DataType.DENSE_VECTOR, "second") + ), + "CoalesceFloatEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]", + DataType.DENSE_VECTOR, + equalTo(firstVector == null ? secondVector : firstVector) + ); + })); List suppliers = new ArrayList<>(noNullsSuppliers); for (TestCaseSupplier s : noNullsSuppliers) { for (int nullUpTo = 1; nullUpTo < s.types().size(); nullUpTo++) {