From bb1f129d1051cd7760d8d4b811b612b1fa929fc2 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 17 Dec 2025 00:39:02 +0000 Subject: [PATCH 01/14] WIP MV_UNION function --- .../esql/functions-operators/mv-functions.md | 3 + .../src/main/resources/mv_union.csv-spec | 207 ++++++++++++ .../multivalue/MvUnionBooleanEvaluator.java | 127 ++++++++ .../multivalue/MvUnionBytesRefEvaluator.java | 128 ++++++++ .../multivalue/MvUnionDoubleEvaluator.java | 127 ++++++++ .../multivalue/MvUnionIntEvaluator.java | 127 ++++++++ .../multivalue/MvUnionLongEvaluator.java | 127 ++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 + .../function/EsqlFunctionRegistry.java | 2 + .../multivalue/MvFunctionWritables.java | 1 + .../function/scalar/multivalue/MvUnion.java | 305 ++++++++++++++++++ .../xpack/esql/analysis/VerifierTests.java | 25 ++ .../scalar/multivalue/MvUnionTests.java | 281 ++++++++++++++++ 13 files changed, 1465 insertions(+) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBooleanEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBytesRefEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionDoubleEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionIntEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionLongEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java diff --git a/docs/reference/query-languages/esql/functions-operators/mv-functions.md b/docs/reference/query-languages/esql/functions-operators/mv-functions.md index fa7465ab99513..017856dc4d4a3 100644 --- a/docs/reference/query-languages/esql/functions-operators/mv-functions.md +++ b/docs/reference/query-languages/esql/functions-operators/mv-functions.md @@ -68,6 +68,9 @@ mapped_pages: :::{include} ../_snippets/functions/layout/mv_sum.md ::: +:::{include} ../_snippets/functions/layout/mv_union.md +::: + :::{include} ../_snippets/functions/layout/mv_zip.md ::: diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec new file mode 100644 index 0000000000000..c8a4900127295 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec @@ -0,0 +1,207 @@ +testMvUnionWithIntValues +required_capability: fn_mv_union +// tag::testMvUnionWithIntValues[] +ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +// end::testMvUnionWithIntValues[] +; +// tag::testMvUnionWithIntValues-result[] +finalValue:integer +[1, 2, 3, 4, 5, 6] +// end::testMvUnionWithIntValues-result[] +; + +testMvUnionWithLongValues +required_capability: fn_mv_union +// tag::testMvUnionWithLongValues[] +ROW a = [1, 2, 3, 4, 5]::long, b = [2, 3, 4, 5, 6]::long +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +// end::testMvUnionWithLongValues[] +; +// tag::testMvUnionWithLongValues-result[] +finalValue:long +[1, 2, 3, 4, 5, 6] +// end::testMvUnionWithLongValues-result[] +; + +testMvUnionWithBooleanValues +required_capability: fn_mv_union +// tag::testMvUnionWithBooleanValues[] +ROW a = [true, false], b = [false] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +// end::testMvUnionWithBooleanValues[] +; +// tag::testMvUnionWithBooleanValues-result[] +finalValue:boolean +[true, false] +// end::testMvUnionWithBooleanValues-result[] +; + +testMvUnionWithDoubleValues +required_capability: fn_mv_union +// tag::testMvUnionWithDoubleValues[] +ROW a = [5.2, 10.5, 1.12345], b = [10.5, 2.6928] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +// end::testMvUnionWithDoubleValues[] +; +// tag::testMvUnionWithDoubleValues-result[] +finalValue:double +[5.2, 10.5, 1.12345, 2.6928] +// end::testMvUnionWithDoubleValues-result[] +; + +testMvUnionWithBytesRefValues +required_capability: fn_mv_union +// tag::testMvUnionWithBytesRefValues[] +ROW a = ["one", "two", "three"], b = ["two", "four"] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +// end::testMvUnionWithBytesRefValues[] +; +// tag::testMvUnionWithBytesRefValues-result[] +finalValue:keyword +["one", "two", "three", "four"] +// end::testMvUnionWithBytesRefValues-result[] +; + +testMvUnionGeoPoint +required_capability: fn_mv_union + +ROW a = ["POINT(42.97109629958868 14.7552534006536)", "POINT(23.23 14.7)"]::geo_point, b = ["POINT(42.97109629958868 14.7552534006536)", "POINT(12.12 11.22)"]::geo_point +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:geo_point +["POINT (42.97109629958868 14.7552534006536)", "POINT (23.23 14.7)", "POINT (12.12 11.22)"] +; + +testMvUnionGeoShape +required_capability: fn_mv_union + +ROW a = ["POLYGON((-9 -9, -1 -9, -1 -1, -9 -1, -9 -9))","POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"], b = ["POLYGON((-5 -5, -1 -5, -1 -1, -5 -1, -5 -5))","POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"] +| EVAL finalValue = MV_UNION(TO_GEOSHAPE(a), TO_GEOSHAPE(b)) +| KEEP finalValue; + +finalValue:geo_shape +["POLYGON((-9 -9, -1 -9, -1 -1, -9 -1, -9 -9))", "POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))", "POLYGON((-5 -5, -1 -5, -1 -1, -5 -1, -5 -5))"] +; + +testMvUnionIp +required_capability: fn_mv_union + +ROW a = ["1.1.1.1", "2.2.2.2"]::ip, b = ["3.3.3.3", "2.2.2.2"]::ip +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:ip +["1.1.1.1", "2.2.2.2", "3.3.3.3"] +; + +testMvUnionVersion +required_capability: fn_mv_union + +ROW a = ["1.2.3", "9.3.0"]::version, b = ["4.5", "9.3.0"]::version +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:version +["1.2.3", "9.3.0", "4.5"] +; + +testMvUnionWithSingleValueParam +required_capability: fn_mv_union + +ROW a = 4, b = [4, 5, 6, 7] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:integer +[4, 5, 6, 7] +; + +testMvUnionWithSecondSingleValueParam +required_capability: fn_mv_union + +ROW a = [1, 2, 3, 4], b = 5 +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:integer +[1, 2, 3, 4, 5] +; + +testMvUnionWithTwoSingleValues +required_capability: fn_mv_union + +ROW a = 1, b = 2 +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:integer +[1, 2] +; + +testMvUnionWithIdenticalSingleValues +required_capability: fn_mv_union + +ROW a = 1, b = 1 +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue; + +finalValue:integer +1 +; + +testMvUnionAgainstAnIndex +required_capability: fn_mv_union + +FROM employees +| WHERE MV_COUNT(MV_UNION(salary_change, [-7.26, 12.09])) > 2 +| SORT emp_no +| LIMIT 5 +| KEEP emp_no, hire_date, salary_change; + +emp_no:integer | hire_date:datetime | salary_change:double +10001 | 1986-06-26T00:00:00.000Z | [-1.36, 13.48, 14.68] +10003 | 1986-12-01T00:00:00.000Z | [1.08, 2.29, 6.97, 12.4] +10004 | 1986-12-01T00:00:00.000Z | [-1.78, -0.85, 3.6, 5.04] +10006 | 1989-06-02T00:00:00.000Z | [-6.77, -3.43, 1.63, 3.25] +10008 | 1994-09-15T00:00:00.000Z | [-5.96, -2.92, -0.98, 5.39] +; + +testMvUnionNullReturnedWhenFirstArgIsNull +required_capability: fn_mv_union + +ROW a = [1, 2, 3, 4] +| EVAL finalValue = MV_UNION(null, a) +| KEEP finalValue; + +finalValue:integer +null +; + +testMvUnionNullReturnedWhenSecondArgIsNull +required_capability: fn_mv_union + +ROW a = [1, 2, 3, 4] +| EVAL finalValue = MV_UNION(a, null) +| KEEP finalValue; + +finalValue:integer +null +; + +testMvUnionNullReturnedWhenBothArgsAreNull +required_capability: fn_mv_union + +ROW a = [1, 2, 3, 4] +| EVAL finalValue = MV_UNION(null, null) +| KEEP finalValue; + +finalValue:null +null +; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBooleanEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBooleanEvaluator.java new file mode 100644 index 0000000000000..72bfce3bb9f3e --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBooleanEvaluator.java @@ -0,0 +1,127 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvUnion}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class MvUnionBooleanEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MvUnionBooleanEvaluator.class); + + private final Source source; + + private final EvalOperator.ExpressionEvaluator field1; + + private final EvalOperator.ExpressionEvaluator field2; + + private final DriverContext driverContext; + + private Warnings warnings; + + public MvUnionBooleanEvaluator(Source source, EvalOperator.ExpressionEvaluator field1, + EvalOperator.ExpressionEvaluator field2, DriverContext driverContext) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BooleanBlock field1Block = (BooleanBlock) field1.eval(page)) { + try (BooleanBlock field2Block = (BooleanBlock) field2.eval(page)) { + return eval(page.getPositionCount(), field1Block, field2Block); + } + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += field1.baseRamBytesUsed(); + baseRamBytesUsed += field2.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public BooleanBlock eval(int positionCount, BooleanBlock field1Block, BooleanBlock field2Block) { + try(BooleanBlock.Builder result = driverContext.blockFactory().newBooleanBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + boolean allBlocksAreNulls = true; + if (!field1Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (!field2Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (allBlocksAreNulls) { + result.appendNull(); + continue position; + } + MvUnion.process(result, p, field1Block, field2Block); + } + return result.build(); + } + } + + @Override + public String toString() { + return "MvUnionBooleanEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(field1, field2); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory field1; + + private final EvalOperator.ExpressionEvaluator.Factory field2; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field1, + EvalOperator.ExpressionEvaluator.Factory field2) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + } + + @Override + public MvUnionBooleanEvaluator get(DriverContext context) { + return new MvUnionBooleanEvaluator(source, field1.get(context), field2.get(context), context); + } + + @Override + public String toString() { + return "MvUnionBooleanEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBytesRefEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBytesRefEvaluator.java new file mode 100644 index 0000000000000..ef55b4e4d3f12 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionBytesRefEvaluator.java @@ -0,0 +1,128 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvUnion}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class MvUnionBytesRefEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MvUnionBytesRefEvaluator.class); + + private final Source source; + + private final EvalOperator.ExpressionEvaluator field1; + + private final EvalOperator.ExpressionEvaluator field2; + + private final DriverContext driverContext; + + private Warnings warnings; + + public MvUnionBytesRefEvaluator(Source source, EvalOperator.ExpressionEvaluator field1, + EvalOperator.ExpressionEvaluator field2, DriverContext driverContext) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock field1Block = (BytesRefBlock) field1.eval(page)) { + try (BytesRefBlock field2Block = (BytesRefBlock) field2.eval(page)) { + return eval(page.getPositionCount(), field1Block, field2Block); + } + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += field1.baseRamBytesUsed(); + baseRamBytesUsed += field2.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock field1Block, + BytesRefBlock field2Block) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + boolean allBlocksAreNulls = true; + if (!field1Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (!field2Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (allBlocksAreNulls) { + result.appendNull(); + continue position; + } + MvUnion.process(result, p, field1Block, field2Block); + } + return result.build(); + } + } + + @Override + public String toString() { + return "MvUnionBytesRefEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(field1, field2); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory field1; + + private final EvalOperator.ExpressionEvaluator.Factory field2; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field1, + EvalOperator.ExpressionEvaluator.Factory field2) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + } + + @Override + public MvUnionBytesRefEvaluator get(DriverContext context) { + return new MvUnionBytesRefEvaluator(source, field1.get(context), field2.get(context), context); + } + + @Override + public String toString() { + return "MvUnionBytesRefEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionDoubleEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionDoubleEvaluator.java new file mode 100644 index 0000000000000..2da98f2957fcd --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionDoubleEvaluator.java @@ -0,0 +1,127 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvUnion}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class MvUnionDoubleEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MvUnionDoubleEvaluator.class); + + private final Source source; + + private final EvalOperator.ExpressionEvaluator field1; + + private final EvalOperator.ExpressionEvaluator field2; + + private final DriverContext driverContext; + + private Warnings warnings; + + public MvUnionDoubleEvaluator(Source source, EvalOperator.ExpressionEvaluator field1, + EvalOperator.ExpressionEvaluator field2, DriverContext driverContext) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (DoubleBlock field1Block = (DoubleBlock) field1.eval(page)) { + try (DoubleBlock field2Block = (DoubleBlock) field2.eval(page)) { + return eval(page.getPositionCount(), field1Block, field2Block); + } + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += field1.baseRamBytesUsed(); + baseRamBytesUsed += field2.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public DoubleBlock eval(int positionCount, DoubleBlock field1Block, DoubleBlock field2Block) { + try(DoubleBlock.Builder result = driverContext.blockFactory().newDoubleBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + boolean allBlocksAreNulls = true; + if (!field1Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (!field2Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (allBlocksAreNulls) { + result.appendNull(); + continue position; + } + MvUnion.process(result, p, field1Block, field2Block); + } + return result.build(); + } + } + + @Override + public String toString() { + return "MvUnionDoubleEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(field1, field2); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory field1; + + private final EvalOperator.ExpressionEvaluator.Factory field2; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field1, + EvalOperator.ExpressionEvaluator.Factory field2) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + } + + @Override + public MvUnionDoubleEvaluator get(DriverContext context) { + return new MvUnionDoubleEvaluator(source, field1.get(context), field2.get(context), context); + } + + @Override + public String toString() { + return "MvUnionDoubleEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionIntEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionIntEvaluator.java new file mode 100644 index 0000000000000..2ea312a91bf65 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionIntEvaluator.java @@ -0,0 +1,127 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvUnion}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class MvUnionIntEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MvUnionIntEvaluator.class); + + private final Source source; + + private final EvalOperator.ExpressionEvaluator field1; + + private final EvalOperator.ExpressionEvaluator field2; + + private final DriverContext driverContext; + + private Warnings warnings; + + public MvUnionIntEvaluator(Source source, EvalOperator.ExpressionEvaluator field1, + EvalOperator.ExpressionEvaluator field2, DriverContext driverContext) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (IntBlock field1Block = (IntBlock) field1.eval(page)) { + try (IntBlock field2Block = (IntBlock) field2.eval(page)) { + return eval(page.getPositionCount(), field1Block, field2Block); + } + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += field1.baseRamBytesUsed(); + baseRamBytesUsed += field2.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public IntBlock eval(int positionCount, IntBlock field1Block, IntBlock field2Block) { + try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + boolean allBlocksAreNulls = true; + if (!field1Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (!field2Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (allBlocksAreNulls) { + result.appendNull(); + continue position; + } + MvUnion.process(result, p, field1Block, field2Block); + } + return result.build(); + } + } + + @Override + public String toString() { + return "MvUnionIntEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(field1, field2); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory field1; + + private final EvalOperator.ExpressionEvaluator.Factory field2; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field1, + EvalOperator.ExpressionEvaluator.Factory field2) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + } + + @Override + public MvUnionIntEvaluator get(DriverContext context) { + return new MvUnionIntEvaluator(source, field1.get(context), field2.get(context), context); + } + + @Override + public String toString() { + return "MvUnionIntEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionLongEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionLongEvaluator.java new file mode 100644 index 0000000000000..f9aae8575f3ad --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionLongEvaluator.java @@ -0,0 +1,127 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link MvUnion}. + * This class is generated. Edit {@code EvaluatorImplementer} instead. + */ +public final class MvUnionLongEvaluator implements EvalOperator.ExpressionEvaluator { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(MvUnionLongEvaluator.class); + + private final Source source; + + private final EvalOperator.ExpressionEvaluator field1; + + private final EvalOperator.ExpressionEvaluator field2; + + private final DriverContext driverContext; + + private Warnings warnings; + + public MvUnionLongEvaluator(Source source, EvalOperator.ExpressionEvaluator field1, + EvalOperator.ExpressionEvaluator field2, DriverContext driverContext) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (LongBlock field1Block = (LongBlock) field1.eval(page)) { + try (LongBlock field2Block = (LongBlock) field2.eval(page)) { + return eval(page.getPositionCount(), field1Block, field2Block); + } + } + } + + @Override + public long baseRamBytesUsed() { + long baseRamBytesUsed = BASE_RAM_BYTES_USED; + baseRamBytesUsed += field1.baseRamBytesUsed(); + baseRamBytesUsed += field2.baseRamBytesUsed(); + return baseRamBytesUsed; + } + + public LongBlock eval(int positionCount, LongBlock field1Block, LongBlock field2Block) { + try(LongBlock.Builder result = driverContext.blockFactory().newLongBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + boolean allBlocksAreNulls = true; + if (!field1Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (!field2Block.isNull(p)) { + allBlocksAreNulls = false; + } + if (allBlocksAreNulls) { + result.appendNull(); + continue position; + } + MvUnion.process(result, p, field1Block, field2Block); + } + return result.build(); + } + } + + @Override + public String toString() { + return "MvUnionLongEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(field1, field2); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory field1; + + private final EvalOperator.ExpressionEvaluator.Factory field2; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory field1, + EvalOperator.ExpressionEvaluator.Factory field2) { + this.source = source; + this.field1 = field1; + this.field2 = field2; + } + + @Override + public MvUnionLongEvaluator get(DriverContext context) { + return new MvUnionLongEvaluator(source, field1.get(context), field2.get(context), context); + } + + @Override + public String toString() { + return "MvUnionLongEvaluator[" + "field1=" + field1 + ", field2=" + field2 + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 4a71a3e313dea..8bd9363770a8d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1781,6 +1781,11 @@ public enum Cap { */ FN_MV_INTERSECTION, + /** + * Support for the MV_UNION function which returns the set union of two multivalued fields + */ + FN_MV_UNION, + // Last capability should still have a comma for fewer merge conflicts when adding new ones :) // This comment prevents the semicolon from being on the previous capability when Spotless formats the file. ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index cbbada51de3eb..b1b0456f2e33d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -171,6 +171,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSlice; import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSort; import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvUnion; import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvZip; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.expression.function.scalar.score.Decay; @@ -526,6 +527,7 @@ private static FunctionDefinition[][] functions() { def(MvPSeriesWeightedSum.class, MvPSeriesWeightedSum::new, "mv_pseries_weighted_sum"), def(MvSort.class, MvSort::new, "mv_sort"), def(MvSlice.class, MvSlice::new, "mv_slice"), + def(MvUnion.class, MvUnion::new, "mv_union"), def(MvZip.class, MvZip::new, "mv_zip"), def(MvSum.class, MvSum::new, "mv_sum"), def(Split.class, Split::new, "split") }, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvFunctionWritables.java index 657a6fd4560a0..3a902c839be78 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvFunctionWritables.java @@ -32,6 +32,7 @@ public static List getNamedWriteables() { MvSlice.ENTRY, MvSort.ENTRY, MvSum.ENTRY, + MvUnion.ENTRY, MvZip.ENTRY ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java new file mode 100644 index 0000000000000..f4a1499f18825 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -0,0 +1,305 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Position; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.Nullability; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.BinaryScalarFunction; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; + +import java.io.IOException; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.function.Consumer; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndExponentialHistogram; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; + +/** + * Returns the union of values from two multi-valued fields (all unique values from both inputs). + * Example: + * Given set A = {"a","b","c"} and set B = {"b","c","d"}, MV_UNION(A, B) returns {"a", "b", "c", "d"} + */ +public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "MvUnion", MvUnion::new); + + private DataType dataType; + + @FunctionInfo( + returnType = { + "boolean", + "cartesian_point", + "cartesian_shape", + "date", + "date_nanos", + "double", + "geo_point", + "geo_shape", + "geohash", + "geotile", + "geohex", + "integer", + "ip", + "keyword", + "long", + "unsigned_long", + "version" }, + description = "Returns all unique values from both input fields. Returns `null` if either field is null.", + preview = true, + examples = { + @Example(file = "mv_union", tag = "testMvUnionWithIntValues"), + @Example(file = "mv_union", tag = "testMvUnionWithLongValues"), + @Example(file = "mv_union", tag = "testMvUnionWithBooleanValues"), + @Example(file = "mv_union", tag = "testMvUnionWithDoubleValues"), + @Example(file = "mv_union", tag = "testMvUnionWithBytesRefValues") }, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.3.0") } + ) + public MvUnion( + Source source, + @Param( + name = "field1", + type = { + "boolean", + "cartesian_point", + "cartesian_shape", + "date", + "date_nanos", + "double", + "geo_point", + "geo_shape", + "geohash", + "geotile", + "geohex", + "integer", + "ip", + "keyword", + "long", + "text", + "unsigned_long", + "version" }, + description = "Multivalue expression. If null, the function returns null." + ) Expression field1, + @Param( + name = "field2", + type = { + "boolean", + "cartesian_point", + "cartesian_shape", + "date", + "date_nanos", + "double", + "geo_point", + "geo_shape", + "geohash", + "geotile", + "geohex", + "integer", + "ip", + "keyword", + "long", + "text", + "unsigned_long", + "version" }, + description = "Multivalue expression. If null, the function returns null." + ) Expression field2 + ) { + super(source, field1, field2); + } + + private MvUnion(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); + } + + @Override + public Object fold(FoldContext ctx) { + return EvaluatorMapper.super.fold(source(), ctx); + } + + @Evaluator(extraName = "Boolean") + static void process(BooleanBlock.Builder builder, @Position int position, BooleanBlock field1, BooleanBlock field2) { + processUnionSet(builder, position, field1, field2, (p, block) -> ((BooleanBlock) block).getBoolean(p), builder::appendBoolean); + } + + @Evaluator(extraName = "BytesRef") + static void process(BytesRefBlock.Builder builder, @Position int position, BytesRefBlock field1, BytesRefBlock field2) { + processUnionSet(builder, position, field1, field2, (p, block) -> { + BytesRef value = new BytesRef(); + return ((BytesRefBlock) block).getBytesRef(p, value); + }, builder::appendBytesRef); + } + + @Evaluator(extraName = "Int") + static void process(IntBlock.Builder builder, @Position int position, IntBlock field1, IntBlock field2) { + processUnionSet(builder, position, field1, field2, (p, block) -> ((IntBlock) block).getInt(p), builder::appendInt); + } + + @Evaluator(extraName = "Long") + static void process(LongBlock.Builder builder, @Position int position, LongBlock field1, LongBlock field2) { + processUnionSet(builder, position, field1, field2, (p, block) -> ((LongBlock) block).getLong(p), builder::appendLong); + } + + @Evaluator(extraName = "Double") + static void process(DoubleBlock.Builder builder, @Position int position, DoubleBlock field1, DoubleBlock field2) { + processUnionSet(builder, position, field1, field2, (p, block) -> ((DoubleBlock) block).getDouble(p), builder::appendDouble); + } + + @Override + public DataType dataType() { + if (dataType == null) { + resolveType(); + } + return dataType; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + if (left().dataType() != DataType.NULL && right().dataType() != DataType.NULL) { + this.dataType = left().dataType().noText(); + return isType( + right(), + t -> t.noText() == left().dataType().noText(), + sourceText(), + SECOND, + left().dataType().noText().typeName() + ); + } + + Expression evaluatedField = left().dataType() == DataType.NULL ? right() : left(); + this.dataType = evaluatedField.dataType().noText(); + + TypeResolution resolution = isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndExponentialHistogram( + evaluatedField, + sourceText(), + FIRST + ); + if (resolution.unresolved()) { + return resolution; + } + + return resolution; + } + + @Override + protected BinaryScalarFunction replaceChildren(Expression newLeft, Expression newRight) { + return new MvUnion(source(), newLeft, newRight); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, MvUnion::new, left(), right()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + return switch (PlannerUtils.toElementType(dataType())) { + case BOOLEAN -> new MvUnionBooleanEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right())); + case BYTES_REF -> new MvUnionBytesRefEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right())); + case INT -> new MvUnionIntEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right())); + case LONG -> new MvUnionLongEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right())); + case DOUBLE -> new MvUnionDoubleEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right())); + case NULL -> EvalOperator.CONSTANT_NULL_FACTORY; + default -> throw EsqlIllegalArgumentException.illegalDataType(dataType); + }; + } + + @Override + public Nullability nullable() { + return Nullability.TRUE; + } + + @Override + public int hashCode() { + return Objects.hash(left(), right()); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + MvUnion other = (MvUnion) obj; + return Objects.equals(other.left(), left()) && Objects.equals(other.right(), right()); + } + + static void processUnionSet( + Block.Builder builder, + int position, + Block field1, + Block field2, + BiFunction getValueFunction, + Consumer addValueFunction + ) { + int firstValueCount = field1.getValueCount(position); + int secondValueCount = field2.getValueCount(position); + + int firstValueIndex = field1.getFirstValueIndex(position); + int secondValueIndex = field2.getFirstValueIndex(position); + + // Use LinkedHashSet to maintain insertion order + Set values = new LinkedHashSet<>(); + + // Add all values from first field + for (int i = 0; i < firstValueCount; i++) { + values.add(getValueFunction.apply(firstValueIndex + i, field1)); + } + + // Add all values from second field (duplicates automatically ignored by Set) + for (int i = 0; i < secondValueCount; i++) { + values.add(getValueFunction.apply(secondValueIndex + i, field2)); + } + + // Build result + if (values.size() == 1) { + // Single value - don't wrap in multivalue + addValueFunction.accept(values.iterator().next()); + } else { + builder.beginPositionEntry(); + for (T value : values) { + addValueFunction.accept(value); + } + builder.endPositionEntry(); + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 30036bb1df55b..57682b895b774 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -3455,6 +3455,31 @@ public void testMvIntersectionValidatesDataTypesAreEqual() { } } + public void testMvUnionValidatesDataTypesAreEqual() { + List> values = List.of( + new Tuple<>("[\"one\", \"two\", \"three\", \"four\", \"five\"]", "keyword"), + new Tuple<>("[1, 2, 3, 4, 5]", "integer"), + new Tuple<>("[1, 2, 3, 4, 5]::long", "long"), + new Tuple<>("[1.1, 2.2, 3.3, 4.4, 5.5]", "double"), + new Tuple<>("[false, true, true, false]", "boolean") + ); + + for (int i = 0; i < values.size(); i++) { + for (int j = 0; j < values.size(); j++) { + if (i == j) { + continue; + } + String query = "ROW a = " + values.get(i).v1() + ", b = " + values.get(j).v1() + " | EVAL finalValue = MV_UNION(a, b)"; + String expected = "second argument of [MV_UNION(a, b)] must be [" + + values.get(i).v2() + + "], found value [b] type [" + + values.get(j).v2() + + "]"; + assertThat(error(query, tsdb), containsString(expected)); + } + } + } + private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java new file mode 100644 index 0000000000000..ef9776f124b6e --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java @@ -0,0 +1,281 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.geo.GeometryTestUtils; +import org.elasticsearch.geo.ShapeTestUtils; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.Matcher; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral; +import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN; +import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; +import static org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSliceTests.randomGrid; +import static org.hamcrest.Matchers.equalTo; + +public class MvUnionTests extends AbstractScalarFunctionTestCase { + public MvUnionTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + booleans(suppliers); + ints(suppliers); + longs(suppliers); + doubles(suppliers); + bytesRefs(suppliers); + return parameterSuppliersFromTypedData(anyNullIsNull(true, suppliers)); + } + + @Override + protected Expression build(Source source, List args) { + return new MvUnion(source, args.get(0), args.get(1)); + } + + private static Matcher matchResult(LinkedHashSet result) { + if (result == null || result.isEmpty()) { + return equalTo(null); + } + if (result.size() > 1) { + return equalTo(new ArrayList<>(result)); + } + return equalTo(result.stream().findFirst().get()); + } + + private static void booleans(List suppliers) { + suppliers.add(new TestCaseSupplier(List.of(DataType.BOOLEAN, DataType.BOOLEAN), () -> { + List field1 = randomList(1, 10, () -> randomBoolean()); + List field2 = randomList(1, 10, () -> randomBoolean()); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.BOOLEAN, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.BOOLEAN, "field2") + ), + "MvUnionBooleanEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.BOOLEAN, + matchResult(result) + ); + })); + } + + private static void ints(List suppliers) { + suppliers.add(new TestCaseSupplier(List.of(DataType.INTEGER, DataType.INTEGER), () -> { + List field1 = randomList(1, 10, () -> randomIntBetween(1, 10)); + List field2 = randomList(1, 10, () -> randomIntBetween(1, 10)); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.INTEGER, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.INTEGER, "field2") + ), + "MvUnionIntEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.INTEGER, + matchResult(result) + ); + })); + } + + private static void longs(List suppliers) { + addLongTestCase(suppliers, DataType.LONG, ESTestCase::randomLong); + addLongTestCase(suppliers, DataType.DATETIME, ESTestCase::randomLong); + addLongTestCase(suppliers, DataType.DATE_NANOS, ESTestCase::randomNonNegativeLong); + + for (DataType gridType : new DataType[] { DataType.GEOHASH, DataType.GEOTILE, DataType.GEOHEX }) { + addLongTestCase(suppliers, gridType, () -> randomGrid(gridType)); + } + + suppliers.add(new TestCaseSupplier(List.of(DataType.UNSIGNED_LONG, DataType.UNSIGNED_LONG), () -> { + List field1 = randomList(1, 10, ESTestCase::randomLong); + List field2 = randomList(1, 10, ESTestCase::randomLong); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.UNSIGNED_LONG, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.UNSIGNED_LONG, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.UNSIGNED_LONG, + matchResult(result) + ); + })); + } + + private static void addLongTestCase(List suppliers, DataType dataType, Supplier longSupplier) { + suppliers.add(new TestCaseSupplier(List.of(dataType, dataType), () -> { + List field1 = randomList(1, 10, longSupplier); + List field2 = randomList(1, 10, longSupplier); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, dataType, "field1"), + new TestCaseSupplier.TypedData(field2, dataType, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + dataType, + matchResult(result) + ); + })); + } + + private static void doubles(List suppliers) { + suppliers.add(new TestCaseSupplier(List.of(DataType.DOUBLE, DataType.DOUBLE), () -> { + List field1 = randomList(1, 10, () -> randomDouble()); + List field2 = randomList(1, 10, () -> randomDouble()); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.DOUBLE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.DOUBLE, "field2") + ), + "MvUnionDoubleEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.DOUBLE, + matchResult(result) + ); + })); + } + + private static void bytesRefs(List suppliers) { + for (DataType lhs : new DataType[] { DataType.KEYWORD, DataType.TEXT }) { + for (DataType rhs : new DataType[] { DataType.KEYWORD, DataType.TEXT }) { + suppliers.add(new TestCaseSupplier(List.of(lhs, rhs), () -> { + List field1 = randomList(1, 10, () -> randomLiteral(lhs).value()); + List field2 = randomList(1, 10, () -> randomLiteral(rhs).value()); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, lhs, "field1"), + new TestCaseSupplier.TypedData(field2, rhs, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.KEYWORD, + matchResult(result) + ); + })); + } + } + + suppliers.add(new TestCaseSupplier(List.of(DataType.IP, DataType.IP), () -> { + List field1 = randomList(1, 10, () -> randomLiteral(DataType.IP).value()); + List field2 = randomList(1, 10, () -> randomLiteral(DataType.IP).value()); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.IP, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.IP, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.IP, + matchResult(result) + ); + })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.VERSION, DataType.VERSION), () -> { + List field1 = randomList(1, 10, () -> randomLiteral(DataType.VERSION).value()); + List field2 = randomList(1, 10, () -> randomLiteral(DataType.VERSION).value()); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.VERSION, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.VERSION, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.VERSION, + matchResult(result) + ); + })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.GEO_POINT, DataType.GEO_POINT), () -> { + List field1 = randomList(1, 10, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomPoint()))); + List field2 = randomList(1, 10, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomPoint()))); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_POINT, + matchResult(result) + ); + })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.CARTESIAN_POINT, DataType.CARTESIAN_POINT), () -> { + List field1 = randomList(1, 10, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomPoint()))); + List field2 = randomList(1, 10, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomPoint()))); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_POINT, + matchResult(result) + ); + })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.GEO_SHAPE, DataType.GEO_SHAPE), () -> { + var field1 = randomList(1, 3, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomGeometry(randomBoolean(), 500)))); + var field2 = randomList(1, 3, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomGeometry(randomBoolean(), 500)))); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_SHAPE, + matchResult(result) + ); + })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.CARTESIAN_SHAPE, DataType.CARTESIAN_SHAPE), () -> { + var field1 = randomList(1, 3, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomGeometry(randomBoolean(), 500)))); + var field2 = randomList(1, 3, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomGeometry(randomBoolean(), 500)))); + var result = new LinkedHashSet<>(field1); + result.addAll(field2); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_SHAPE, + matchResult(result) + ); + })); + } +} From 6b8ea5b5739c5d907980428176210de643a023e5 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 17 Dec 2025 00:41:56 +0000 Subject: [PATCH 02/14] Update docs/changelog/139664.yaml --- docs/changelog/139664.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/139664.yaml diff --git a/docs/changelog/139664.yaml b/docs/changelog/139664.yaml new file mode 100644 index 0000000000000..35761b8877a7e --- /dev/null +++ b/docs/changelog/139664.yaml @@ -0,0 +1,5 @@ +pr: 139664 +summary: Add MV_UNION Function +area: ES|QL +type: enhancement +issues: [] From 12dee78b92f6e5fac004bd6357143ab387babac4 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 17 Dec 2025 19:03:45 +0000 Subject: [PATCH 03/14] Add MV_UNION function for ES|QL tests --- .../functions/description/mv_union.md | 6 + .../_snippets/functions/examples/mv_union.md | 55 +++ .../_snippets/functions/layout/mv_union.md | 27 ++ .../functions/parameters/mv_union.md | 10 + .../_snippets/functions/types/mv_union.md | 27 ++ .../esql/images/functions/mv_union.svg | 1 + .../kibana/definition/functions/mv_union.json | 377 ++++++++++++++++++ .../esql/kibana/docs/functions/mv_union.md | 10 + .../function/scalar/multivalue/MvUnion.java | 19 +- .../scalar/multivalue/MvUnionTests.java | 191 ++++----- 10 files changed, 625 insertions(+), 98 deletions(-) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/mv_union.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/types/mv_union.md create mode 100644 docs/reference/query-languages/esql/images/functions/mv_union.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md new file mode 100644 index 0000000000000..891c2c28d5ffd --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Returns all unique values from both input fields. Returns `null` if either field is null. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/examples/mv_union.md new file mode 100644 index 0000000000000..d25f54b000507 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/mv_union.md @@ -0,0 +1,55 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Examples** + +```esql +ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +| finalValue:integer | +| --- | +| [1, 2, 3, 4, 5, 6] | + +```esql +ROW a = [1, 2, 3, 4, 5]::long, b = [2, 3, 4, 5, 6]::long +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +| finalValue:long | +| --- | +| [1, 2, 3, 4, 5, 6] | + +```esql +ROW a = [true, false], b = [false] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +| finalValue:boolean | +| --- | +| [true, false] | + +```esql +ROW a = [5.2, 10.5, 1.12345], b = [10.5, 2.6928] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +| finalValue:double | +| --- | +| [5.2, 10.5, 1.12345, 2.6928] | + +```esql +ROW a = ["one", "two", "three"], b = ["two", "four"] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +| finalValue:keyword | +| --- | +| ["one", "two", "three", "four"] | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md new file mode 100644 index 0000000000000..fa4f59bdb6aec --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `MV_UNION` [esql-mv_union] +```{applies_to} +stack: preview 9.3.0 +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/mv_union.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/mv_union.md +::: + +:::{include} ../description/mv_union.md +::: + +:::{include} ../types/mv_union.md +::: + +:::{include} ../examples/mv_union.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md new file mode 100644 index 0000000000000..b32c6ba0cfd92 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`field1` +: Multivalue expression. If null, the function returns null. + +`field2` +: Multivalue expression. If null, the function returns null. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/types/mv_union.md new file mode 100644 index 0000000000000..64515ccdcbab1 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/mv_union.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| field1 | field2 | result | +| --- | --- | --- | +| boolean | boolean | boolean | +| cartesian_point | cartesian_point | cartesian_point | +| cartesian_shape | cartesian_shape | cartesian_shape | +| date | date | date | +| date_nanos | date_nanos | date_nanos | +| double | double | double | +| geo_point | geo_point | geo_point | +| geo_shape | geo_shape | geo_shape | +| geohash | geohash | geohash | +| geohex | geohex | geohex | +| geotile | geotile | geotile | +| integer | integer | integer | +| ip | ip | ip | +| keyword | keyword | keyword | +| keyword | text | keyword | +| long | long | long | +| text | keyword | keyword | +| text | text | keyword | +| unsigned_long | unsigned_long | unsigned_long | +| version | version | version | + diff --git a/docs/reference/query-languages/esql/images/functions/mv_union.svg b/docs/reference/query-languages/esql/images/functions/mv_union.svg new file mode 100644 index 0000000000000..f3899e7c58c59 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/mv_union.svg @@ -0,0 +1 @@ +MV_UNION(field1,field2) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json b/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json new file mode 100644 index 0000000000000..b7e64c3d63f7c --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json @@ -0,0 +1,377 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "mv_union", + "description" : "Returns all unique values from both input fields. Returns `null` if either field is null.", + "signatures" : [ + { + "params" : [ + { + "name" : "field1", + "type" : "boolean", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "boolean", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "cartesian_point", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "cartesian_point", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "cartesian_point" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "cartesian_shape", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "cartesian_shape", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "cartesian_shape" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "date", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "date", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "date" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "date_nanos", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "date_nanos", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "double", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "double", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "geo_point", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "geo_point", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "geo_point" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "geo_shape", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "geo_shape", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "geo_shape" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "geohash", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "geohash", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "geohash" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "geohex", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "geohex", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "geohex" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "geotile", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "geotile", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "geotile" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "integer", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "integer", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "integer" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "ip", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "ip", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "ip" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "keyword", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "keyword", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "keyword", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "text", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "long", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "long", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "text", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "keyword", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "text", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "text", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "unsigned_long", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "unsigned_long", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "unsigned_long" + }, + { + "params" : [ + { + "name" : "field1", + "type" : "version", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + }, + { + "name" : "field2", + "type" : "version", + "optional" : false, + "description" : "Multivalue expression. If null, the function returns null." + } + ], + "variadic" : false, + "returnType" : "version" + } + ], + "examples" : [ + "ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6]\n| EVAL finalValue = MV_UNION(a, b)\n| KEEP finalValue", + "ROW a = [1, 2, 3, 4, 5]::long, b = [2, 3, 4, 5, 6]::long\n| EVAL finalValue = MV_UNION(a, b)\n| KEEP finalValue", + "ROW a = [true, false], b = [false]\n| EVAL finalValue = MV_UNION(a, b)\n| KEEP finalValue", + "ROW a = [5.2, 10.5, 1.12345], b = [10.5, 2.6928]\n| EVAL finalValue = MV_UNION(a, b)\n| KEEP finalValue", + "ROW a = [\"one\", \"two\", \"three\"], b = [\"two\", \"four\"]\n| EVAL finalValue = MV_UNION(a, b)\n| KEEP finalValue" + ], + "preview" : true, + "snapshot_only" : false +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md b/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md new file mode 100644 index 0000000000000..758fcd03569e0 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### MV UNION +Returns all unique values from both input fields. Returns `null` if either field is null. + +```esql +ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index f4a1499f18825..4275042744163 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -274,6 +274,12 @@ static void processUnionSet( int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); + // If either field has no values (is null), return null + if (firstValueCount == 0 || secondValueCount == 0) { + builder.appendNull(); + return; + } + int firstValueIndex = field1.getFirstValueIndex(position); int secondValueIndex = field2.getFirstValueIndex(position); @@ -291,15 +297,10 @@ static void processUnionSet( } // Build result - if (values.size() == 1) { - // Single value - don't wrap in multivalue - addValueFunction.accept(values.iterator().next()); - } else { - builder.beginPositionEntry(); - for (T value : values) { - addValueFunction.accept(value); - } - builder.endPositionEntry(); + builder.beginPositionEntry(); + for (T value : values) { + addValueFunction.accept(value); } + builder.endPositionEntry(); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java index ef9776f124b6e..07ae4882161f8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java @@ -22,11 +22,14 @@ import org.hamcrest.Matcher; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.function.Supplier; import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.core.util.NumericUtils.unsignedLongAsBigInteger; import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN; import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; import static org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSliceTests.randomGrid; @@ -53,7 +56,7 @@ protected Expression build(Source source, List args) { return new MvUnion(source, args.get(0), args.get(1)); } - private static Matcher matchResult(LinkedHashSet result) { + private static Matcher matchResult(HashSet result) { if (result == null || result.isEmpty()) { return equalTo(null); } @@ -70,13 +73,13 @@ private static void booleans(List suppliers) { var result = new LinkedHashSet<>(field1); result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.BOOLEAN, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.BOOLEAN, "field2") - ), - "MvUnionBooleanEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.BOOLEAN, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.BOOLEAN, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.BOOLEAN, "field2") + ), + "MvUnionBooleanEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.BOOLEAN, + matchResult(result) ); })); } @@ -88,13 +91,13 @@ private static void ints(List suppliers) { var result = new LinkedHashSet<>(field1); result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.INTEGER, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.INTEGER, "field2") - ), - "MvUnionIntEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.INTEGER, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.INTEGER, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.INTEGER, "field2") + ), + "MvUnionIntEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.INTEGER, + matchResult(result) ); })); } @@ -103,24 +106,25 @@ private static void longs(List suppliers) { addLongTestCase(suppliers, DataType.LONG, ESTestCase::randomLong); addLongTestCase(suppliers, DataType.DATETIME, ESTestCase::randomLong); addLongTestCase(suppliers, DataType.DATE_NANOS, ESTestCase::randomNonNegativeLong); - for (DataType gridType : new DataType[] { DataType.GEOHASH, DataType.GEOTILE, DataType.GEOHEX }) { addLongTestCase(suppliers, gridType, () -> randomGrid(gridType)); } - suppliers.add(new TestCaseSupplier(List.of(DataType.UNSIGNED_LONG, DataType.UNSIGNED_LONG), () -> { + suppliers.add(new TestCaseSupplier(List.of(UNSIGNED_LONG, UNSIGNED_LONG), () -> { List field1 = randomList(1, 10, ESTestCase::randomLong); List field2 = randomList(1, 10, ESTestCase::randomLong); - var result = new LinkedHashSet<>(field1); - result.addAll(field2); + var resultSet = new LinkedHashSet<>(field1); + resultSet.addAll(field2); + var result = resultSet.stream().map(v -> unsignedLongAsBigInteger(v)).toList(); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.UNSIGNED_LONG, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.UNSIGNED_LONG, "field2") - ), - "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.UNSIGNED_LONG, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, UNSIGNED_LONG, "field1"), + new TestCaseSupplier.TypedData(field2, UNSIGNED_LONG, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + UNSIGNED_LONG, + equalTo(result) ); })); } @@ -131,14 +135,15 @@ private static void addLongTestCase(List suppliers, DataType d List field2 = randomList(1, 10, longSupplier); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, dataType, "field1"), - new TestCaseSupplier.TypedData(field2, dataType, "field2") - ), - "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - dataType, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, dataType, "field1"), + new TestCaseSupplier.TypedData(field2, dataType, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + dataType, + matchResult(result) ); })); } @@ -149,14 +154,15 @@ private static void doubles(List suppliers) { List field2 = randomList(1, 10, () -> randomDouble()); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.DOUBLE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.DOUBLE, "field2") - ), - "MvUnionDoubleEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.DOUBLE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.DOUBLE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.DOUBLE, "field2") + ), + "MvUnionDoubleEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.DOUBLE, + matchResult(result) ); })); } @@ -169,14 +175,15 @@ private static void bytesRefs(List suppliers) { List field2 = randomList(1, 10, () -> randomLiteral(rhs).value()); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, lhs, "field1"), - new TestCaseSupplier.TypedData(field2, rhs, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.KEYWORD, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, lhs, "field1"), + new TestCaseSupplier.TypedData(field2, rhs, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.KEYWORD, + matchResult(result) ); })); } @@ -187,14 +194,15 @@ private static void bytesRefs(List suppliers) { List field2 = randomList(1, 10, () -> randomLiteral(DataType.IP).value()); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.IP, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.IP, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.IP, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.IP, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.IP, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.IP, + matchResult(result) ); })); @@ -203,14 +211,15 @@ private static void bytesRefs(List suppliers) { List field2 = randomList(1, 10, () -> randomLiteral(DataType.VERSION).value()); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.VERSION, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.VERSION, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.VERSION, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.VERSION, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.VERSION, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.VERSION, + matchResult(result) ); })); @@ -219,14 +228,15 @@ private static void bytesRefs(List suppliers) { List field2 = randomList(1, 10, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomPoint()))); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.GEO_POINT, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.GEO_POINT, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.GEO_POINT, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_POINT, + matchResult(result) ); })); @@ -235,14 +245,15 @@ private static void bytesRefs(List suppliers) { List field2 = randomList(1, 10, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomPoint()))); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_POINT, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_POINT, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.CARTESIAN_POINT, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_POINT, + matchResult(result) ); })); @@ -251,14 +262,15 @@ private static void bytesRefs(List suppliers) { var field2 = randomList(1, 3, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomGeometry(randomBoolean(), 500)))); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.GEO_SHAPE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.GEO_SHAPE, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.GEO_SHAPE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_SHAPE, + matchResult(result) ); })); @@ -267,14 +279,15 @@ private static void bytesRefs(List suppliers) { var field2 = randomList(1, 3, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomGeometry(randomBoolean(), 500)))); var result = new LinkedHashSet<>(field1); result.addAll(field2); + return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_SHAPE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_SHAPE, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.CARTESIAN_SHAPE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_SHAPE, + matchResult(result) ); })); } From e3958550500a8c113e8d5adf40c24116fefaf1e6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 17 Dec 2025 19:13:09 +0000 Subject: [PATCH 04/14] [CI] Auto commit changes from spotless --- .../scalar/multivalue/MvUnionTests.java | 168 +++++++++--------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java index 07ae4882161f8..e9433806bbd36 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java @@ -73,13 +73,13 @@ private static void booleans(List suppliers) { var result = new LinkedHashSet<>(field1); result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.BOOLEAN, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.BOOLEAN, "field2") - ), - "MvUnionBooleanEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.BOOLEAN, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.BOOLEAN, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.BOOLEAN, "field2") + ), + "MvUnionBooleanEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.BOOLEAN, + matchResult(result) ); })); } @@ -91,13 +91,13 @@ private static void ints(List suppliers) { var result = new LinkedHashSet<>(field1); result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.INTEGER, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.INTEGER, "field2") - ), - "MvUnionIntEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.INTEGER, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.INTEGER, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.INTEGER, "field2") + ), + "MvUnionIntEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.INTEGER, + matchResult(result) ); })); } @@ -118,13 +118,13 @@ private static void longs(List suppliers) { var result = resultSet.stream().map(v -> unsignedLongAsBigInteger(v)).toList(); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, UNSIGNED_LONG, "field1"), - new TestCaseSupplier.TypedData(field2, UNSIGNED_LONG, "field2") - ), - "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - UNSIGNED_LONG, - equalTo(result) + List.of( + new TestCaseSupplier.TypedData(field1, UNSIGNED_LONG, "field1"), + new TestCaseSupplier.TypedData(field2, UNSIGNED_LONG, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + UNSIGNED_LONG, + equalTo(result) ); })); } @@ -137,13 +137,13 @@ private static void addLongTestCase(List suppliers, DataType d result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, dataType, "field1"), - new TestCaseSupplier.TypedData(field2, dataType, "field2") - ), - "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - dataType, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, dataType, "field1"), + new TestCaseSupplier.TypedData(field2, dataType, "field2") + ), + "MvUnionLongEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + dataType, + matchResult(result) ); })); } @@ -156,13 +156,13 @@ private static void doubles(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.DOUBLE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.DOUBLE, "field2") - ), - "MvUnionDoubleEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.DOUBLE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.DOUBLE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.DOUBLE, "field2") + ), + "MvUnionDoubleEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.DOUBLE, + matchResult(result) ); })); } @@ -177,13 +177,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, lhs, "field1"), - new TestCaseSupplier.TypedData(field2, rhs, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.KEYWORD, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, lhs, "field1"), + new TestCaseSupplier.TypedData(field2, rhs, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.KEYWORD, + matchResult(result) ); })); } @@ -196,13 +196,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.IP, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.IP, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.IP, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.IP, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.IP, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.IP, + matchResult(result) ); })); @@ -213,13 +213,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.VERSION, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.VERSION, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.VERSION, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.VERSION, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.VERSION, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.VERSION, + matchResult(result) ); })); @@ -230,13 +230,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.GEO_POINT, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.GEO_POINT, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.GEO_POINT, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_POINT, + matchResult(result) ); })); @@ -247,13 +247,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_POINT, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_POINT, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.CARTESIAN_POINT, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_POINT, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_POINT, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_POINT, + matchResult(result) ); })); @@ -264,13 +264,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.GEO_SHAPE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.GEO_SHAPE, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.GEO_SHAPE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.GEO_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.GEO_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.GEO_SHAPE, + matchResult(result) ); })); @@ -281,13 +281,13 @@ private static void bytesRefs(List suppliers) { result.addAll(field2); return new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_SHAPE, "field1"), - new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_SHAPE, "field2") - ), - "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", - DataType.CARTESIAN_SHAPE, - matchResult(result) + List.of( + new TestCaseSupplier.TypedData(field1, DataType.CARTESIAN_SHAPE, "field1"), + new TestCaseSupplier.TypedData(field2, DataType.CARTESIAN_SHAPE, "field2") + ), + "MvUnionBytesRefEvaluator[field1=Attribute[channel=0], field2=Attribute[channel=1]]", + DataType.CARTESIAN_SHAPE, + matchResult(result) ); })); } From 629fcb5d6767117d7745924b7d15fd7479ac6192 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 17 Dec 2025 19:38:24 +0000 Subject: [PATCH 05/14] import error --- .../esql/expression/function/scalar/multivalue/MvUnion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index 4275042744163..4b3f245d3f95f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -45,7 +45,7 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndExponentialHistogram; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndHistogram; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; /** @@ -204,7 +204,7 @@ protected TypeResolution resolveType() { Expression evaluatedField = left().dataType() == DataType.NULL ? right() : left(); this.dataType = evaluatedField.dataType().noText(); - TypeResolution resolution = isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndExponentialHistogram( + TypeResolution resolution = isRepresentableExceptCountersDenseVectorAggregateMetricDoubleAndHistogram( evaluatedField, sourceText(), FIRST From b6ff5e04de890db0e95a94265e8806fec164873a Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 17 Dec 2025 21:04:36 +0000 Subject: [PATCH 06/14] updated csv tests --- .../src/main/resources/mv_union.csv-spec | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec index c8a4900127295..9991a23555a8d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec @@ -82,12 +82,12 @@ finalValue:geo_point testMvUnionGeoShape required_capability: fn_mv_union -ROW a = ["POLYGON((-9 -9, -1 -9, -1 -1, -9 -1, -9 -9))","POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"], b = ["POLYGON((-5 -5, -1 -5, -1 -1, -5 -1, -5 -5))","POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"] -| EVAL finalValue = MV_UNION(TO_GEOSHAPE(a), TO_GEOSHAPE(b)) +ROW a = "POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"::geo_shape, b = "POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))"::geo_shape +| EVAL finalValue = MV_UNION(a, b) | KEEP finalValue; finalValue:geo_shape -["POLYGON((-9 -9, -1 -9, -1 -1, -9 -1, -9 -9))", "POLYGON((1 1, 9 1, 9 9, 1 9, 1 1))", "POLYGON((-5 -5, -1 -5, -1 -1, -5 -1, -5 -5))"] +POLYGON ((1 1, 9 1, 9 9, 1 9, 1 1)) ; testMvUnionIp @@ -160,17 +160,19 @@ testMvUnionAgainstAnIndex required_capability: fn_mv_union FROM employees -| WHERE MV_COUNT(MV_UNION(salary_change, [-7.26, 12.09])) > 2 +| EVAL union_result = MV_UNION(salary_change, [-7.23, 11.17]) +| EVAL union_count = MV_COUNT(union_result) +| WHERE union_count > 2 | SORT emp_no | LIMIT 5 | KEEP emp_no, hire_date, salary_change; emp_no:integer | hire_date:datetime | salary_change:double -10001 | 1986-06-26T00:00:00.000Z | [-1.36, 13.48, 14.68] -10003 | 1986-12-01T00:00:00.000Z | [1.08, 2.29, 6.97, 12.4] -10004 | 1986-12-01T00:00:00.000Z | [-1.78, -0.85, 3.6, 5.04] -10006 | 1989-06-02T00:00:00.000Z | [-6.77, -3.43, 1.63, 3.25] -10008 | 1994-09-15T00:00:00.000Z | [-5.96, -2.92, -0.98, 5.39] +10001 | 1986-06-26T00:00:00.000Z | 1.19 +10003 | 1986-08-28T00:00:00.000Z | [12.82, 14.68] +10004 | 1986-12-01T00:00:00.000Z | [-0.35, 1.13, 3.65, 13.48] +10005 | 1989-09-12T00:00:00.000Z | [-2.14, 13.07] +10006 | 1989-06-02T00:00:00.000Z | -3.9 ; testMvUnionNullReturnedWhenFirstArgIsNull From 2b1ae1bf6dba1e4097071d46e5dcb3a63ff7b440 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 15:42:46 +0000 Subject: [PATCH 07/14] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java Co-authored-by: Liam Thompson --- .../esql/expression/function/scalar/multivalue/MvUnion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index 4b3f245d3f95f..a9ded32192249 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -77,7 +77,7 @@ public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { "long", "unsigned_long", "version" }, - description = "Returns all unique values from both input fields. Returns `null` if either field is null.", + description = "Returns all unique values from the combined input fields (set union). Returns `null` if either field is null.", preview = true, examples = { @Example(file = "mv_union", tag = "testMvUnionWithIntValues"), From 23c94ac15136cf10e09b913eb4a57fa05217538b Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 15:43:00 +0000 Subject: [PATCH 08/14] Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java Co-authored-by: Liam Thompson --- .../esql/expression/function/scalar/multivalue/MvUnion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index a9ded32192249..b1fbaa081f2de 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -85,7 +85,7 @@ public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { @Example(file = "mv_union", tag = "testMvUnionWithBooleanValues"), @Example(file = "mv_union", tag = "testMvUnionWithDoubleValues"), @Example(file = "mv_union", tag = "testMvUnionWithBytesRefValues") }, - appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.3.0") } + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.4.0") } ) public MvUnion( Source source, From 8b3c105a0fe56fd122171ae493c5cd3b92239f66 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 16:41:22 +0000 Subject: [PATCH 09/14] Made markdown changes --- .../query-languages/esql/_snippets/lists/mv-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/query-languages/esql/_snippets/lists/mv-functions.md b/docs/reference/query-languages/esql/_snippets/lists/mv-functions.md index 7c067503cc1d7..6413f7524ce5e 100644 --- a/docs/reference/query-languages/esql/_snippets/lists/mv-functions.md +++ b/docs/reference/query-languages/esql/_snippets/lists/mv-functions.md @@ -16,4 +16,5 @@ * [`MV_SORT`](../../functions-operators/mv-functions.md#esql-mv_sort) * [`MV_SLICE`](../../functions-operators/mv-functions.md#esql-mv_slice) * [`MV_SUM`](../../functions-operators/mv-functions.md#esql-mv_sum) +* [`MV_UNION`](../../functions-operators/mv-functions.md#esql-mv_union) {applies_to}`stack: preview 9.4` {applies_to}`serverless: preview` * [`MV_ZIP`](../../functions-operators/mv-functions.md#esql-mv_zip) From 725dc8130e7412e99c62e37cd9b7d59ae8516dff Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 18:06:06 +0000 Subject: [PATCH 10/14] Update MV_UNION to treat null as empty set --- .../testFixtures/src/main/resources/mv_union.csv-spec | 4 ++-- .../expression/function/scalar/multivalue/MvUnion.java | 10 +++++----- .../function/scalar/multivalue/MvUnionTests.java | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec index 9991a23555a8d..0a0a076f46e58 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_union.csv-spec @@ -183,7 +183,7 @@ ROW a = [1, 2, 3, 4] | KEEP finalValue; finalValue:integer -null +[1, 2, 3, 4] ; testMvUnionNullReturnedWhenSecondArgIsNull @@ -194,7 +194,7 @@ ROW a = [1, 2, 3, 4] | KEEP finalValue; finalValue:integer -null +[1, 2, 3, 4] ; testMvUnionNullReturnedWhenBothArgsAreNull diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index b1fbaa081f2de..01a1b91fd2b3b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -77,7 +77,7 @@ public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { "long", "unsigned_long", "version" }, - description = "Returns all unique values from the combined input fields (set union). Returns `null` if either field is null.", + description = "Returns all unique values from the combined input fields (set union). Null values are treated as empty sets; returns `null` only if both fields are null.", preview = true, examples = { @Example(file = "mv_union", tag = "testMvUnionWithIntValues"), @@ -110,7 +110,7 @@ public MvUnion( "text", "unsigned_long", "version" }, - description = "Multivalue expression. If null, the function returns null." + description = "Multivalue expression. Null values are treated as empty sets." ) Expression field1, @Param( name = "field2", @@ -133,7 +133,7 @@ public MvUnion( "text", "unsigned_long", "version" }, - description = "Multivalue expression. If null, the function returns null." + description = "Multivalue expression. Null values are treated as empty sets." ) Expression field2 ) { super(source, field1, field2); @@ -274,8 +274,8 @@ static void processUnionSet( int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); - // If either field has no values (is null), return null - if (firstValueCount == 0 || secondValueCount == 0) { + // If both field has no values, return null + if (firstValueCount == 0 && secondValueCount == 0) { builder.appendNull(); return; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java index e9433806bbd36..7dc016ef08444 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnionTests.java @@ -48,7 +48,7 @@ public static Iterable parameters() { longs(suppliers); doubles(suppliers); bytesRefs(suppliers); - return parameterSuppliersFromTypedData(anyNullIsNull(true, suppliers)); + return parameterSuppliersFromTypedData(suppliers); } @Override From 18d62321a0d4e88016585d8c7c69a4bf63eb462b Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 18:47:28 +0000 Subject: [PATCH 11/14] updated MV_UNION for null acceptance --- .../functions/description/mv_union.md | 2 +- .../_snippets/functions/layout/mv_union.md | 2 +- .../functions/parameters/mv_union.md | 4 +- .../kibana/definition/functions/mv_union.json | 82 +++++++++---------- .../esql/kibana/docs/functions/mv_union.md | 2 +- .../function/scalar/multivalue/MvUnion.java | 28 ++++++- 6 files changed, 72 insertions(+), 48 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md index 891c2c28d5ffd..35ae0b759de6c 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md +++ b/docs/reference/query-languages/esql/_snippets/functions/description/mv_union.md @@ -2,5 +2,5 @@ **Description** -Returns all unique values from both input fields. Returns `null` if either field is null. +Returns all unique values from the combined input fields (set union). Null values are treated as empty sets; returns `null` only if both fields are null. diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md index fa4f59bdb6aec..a82b7150c543d 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/mv_union.md @@ -2,7 +2,7 @@ ## `MV_UNION` [esql-mv_union] ```{applies_to} -stack: preview 9.3.0 +stack: preview 9.4.0 serverless: preview ``` diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md index b32c6ba0cfd92..a0ca1fdc07df2 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/mv_union.md @@ -3,8 +3,8 @@ **Parameters** `field1` -: Multivalue expression. If null, the function returns null. +: Multivalue expression. Null values are treated as empty sets. `field2` -: Multivalue expression. If null, the function returns null. +: Multivalue expression. Null values are treated as empty sets. diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json b/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json index b7e64c3d63f7c..ce1aca8b23e80 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/mv_union.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", "type" : "scalar", "name" : "mv_union", - "description" : "Returns all unique values from both input fields. Returns `null` if either field is null.", + "description" : "Returns all unique values from the combined input fields (set union). Null values are treated as empty sets; returns `null` only if both fields are null.", "signatures" : [ { "params" : [ @@ -10,13 +10,13 @@ "name" : "field1", "type" : "boolean", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "boolean", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -28,13 +28,13 @@ "name" : "field1", "type" : "cartesian_point", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "cartesian_point", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -46,13 +46,13 @@ "name" : "field1", "type" : "cartesian_shape", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "cartesian_shape", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -64,13 +64,13 @@ "name" : "field1", "type" : "date", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "date", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -82,13 +82,13 @@ "name" : "field1", "type" : "date_nanos", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "date_nanos", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -100,13 +100,13 @@ "name" : "field1", "type" : "double", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "double", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -118,13 +118,13 @@ "name" : "field1", "type" : "geo_point", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "geo_point", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -136,13 +136,13 @@ "name" : "field1", "type" : "geo_shape", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "geo_shape", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -154,13 +154,13 @@ "name" : "field1", "type" : "geohash", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "geohash", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -172,13 +172,13 @@ "name" : "field1", "type" : "geohex", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "geohex", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -190,13 +190,13 @@ "name" : "field1", "type" : "geotile", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "geotile", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -208,13 +208,13 @@ "name" : "field1", "type" : "integer", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "integer", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -226,13 +226,13 @@ "name" : "field1", "type" : "ip", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "ip", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -244,13 +244,13 @@ "name" : "field1", "type" : "keyword", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "keyword", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -262,13 +262,13 @@ "name" : "field1", "type" : "keyword", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "text", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -280,13 +280,13 @@ "name" : "field1", "type" : "long", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "long", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -298,13 +298,13 @@ "name" : "field1", "type" : "text", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "keyword", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -316,13 +316,13 @@ "name" : "field1", "type" : "text", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "text", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -334,13 +334,13 @@ "name" : "field1", "type" : "unsigned_long", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "unsigned_long", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, @@ -352,13 +352,13 @@ "name" : "field1", "type" : "version", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." }, { "name" : "field2", "type" : "version", "optional" : false, - "description" : "Multivalue expression. If null, the function returns null." + "description" : "Multivalue expression. Null values are treated as empty sets." } ], "variadic" : false, diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md b/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md index 758fcd03569e0..17b297a2d654a 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/mv_union.md @@ -1,7 +1,7 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. ### MV UNION -Returns all unique values from both input fields. Returns `null` if either field is null. +Returns all unique values from the combined input fields (set union). Null values are treated as empty sets; returns `null` only if both fields are null. ```esql ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index 01a1b91fd2b3b..7604e4c06ec4a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; @@ -145,7 +147,26 @@ private MvUnion(StreamInput in) throws IOException { @Override public Object fold(FoldContext ctx) { - return EvaluatorMapper.super.fold(source(), ctx); + Object leftVal = left().fold(ctx); + Object rightVal = right().fold(ctx); + + // If both are null, return null + if (leftVal == null && rightVal == null) { + return null; + } + + // Treat null as empty set + List leftList = leftVal == null ? List.of() : (leftVal instanceof List l ? l : List.of(leftVal)); + List rightList = rightVal == null ? List.of() : (rightVal instanceof List l ? l : List.of(rightVal)); + + // Compute union using LinkedHashSet to maintain order + Set result = new LinkedHashSet<>(leftList); + result.addAll(rightList); + + if (result.isEmpty()) { + return null; + } + return result.size() == 1 ? result.iterator().next() : new ArrayList<>(result); } @Evaluator(extraName = "Boolean") @@ -246,7 +267,10 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua @Override public Nullability nullable() { - return Nullability.TRUE; + // Return UNKNOWN to prevent the optimizer from replacing the entire + // expression with null when one argument is null. MV_UNION treats + // null as an empty set - only returns null if BOTH arguments are null. + return Nullability.UNKNOWN; } @Override From 87aeeab34bc93fca7744b30a60ae9f96c0fc2f04 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 19 Dec 2025 19:03:30 +0000 Subject: [PATCH 12/14] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- .../esql/expression/function/scalar/multivalue/MvUnion.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3b17233189b9a..51b7269d973bf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1806,7 +1806,7 @@ public enum Cap { * Support for the MV_UNION function which returns the set union of two multivalued fields */ FN_MV_UNION, - + /** * Enables late materialization on node reduce. See also QueryPragmas.NODE_LEVEL_REDUCTION */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index 7604e4c06ec4a..a89716aa43d2b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; -import java.util.ArrayList; -import java.util.List; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; @@ -39,7 +37,9 @@ import org.elasticsearch.xpack.esql.planner.PlannerUtils; import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedHashSet; +import java.util.List; import java.util.Objects; import java.util.Set; import java.util.function.BiFunction; From ec29f3e79a3ab892b54f8cd3e76ed1fc309e564b Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 19 Dec 2025 19:21:16 +0000 Subject: [PATCH 13/14] Line break introduced --- .../esql/expression/function/scalar/multivalue/MvUnion.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index a89716aa43d2b..7142fffb93f6b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -79,7 +79,8 @@ public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { "long", "unsigned_long", "version" }, - description = "Returns all unique values from the combined input fields (set union). Null values are treated as empty sets; returns `null` only if both fields are null.", + description = "Returns all unique values from the combined input fields (set union). " + + "Null values are treated as empty sets; returns `null` only if both fields are null.", preview = true, examples = { @Example(file = "mv_union", tag = "testMvUnionWithIntValues"), From 8253dd0dbfa6cfb5be6a8cbf3682ce69bbedec2f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 19 Dec 2025 19:30:11 +0000 Subject: [PATCH 14/14] [CI] Auto commit changes from spotless --- .../esql/expression/function/scalar/multivalue/MvUnion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index 7142fffb93f6b..df64b68589666 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -79,8 +79,8 @@ public class MvUnion extends BinaryScalarFunction implements EvaluatorMapper { "long", "unsigned_long", "version" }, - description = "Returns all unique values from the combined input fields (set union). " + - "Null values are treated as empty sets; returns `null` only if both fields are null.", + description = "Returns all unique values from the combined input fields (set union). " + + "Null values are treated as empty sets; returns `null` only if both fields are null.", preview = true, examples = { @Example(file = "mv_union", tag = "testMvUnionWithIntValues"),