From 5635689b4c3f906f482b840a87dacab2ba8ecd2d Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 23 Dec 2025 23:49:50 +0000 Subject: [PATCH 1/3] ES|QL: Refactor MV set operations to use shared helper --- .../scalar/multivalue/MvIntersection.java | 25 +++++++----- .../multivalue/MvSetOperationHelper.java | 40 +++++++++++++++++++ .../function/scalar/multivalue/MvUnion.java | 35 ++++++++-------- .../MvSetOperationHelperTests.java | 0 4 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java index 71a3aae256b93..b512e770235ef 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java @@ -286,33 +286,38 @@ static void processIntersectionSet( ) { int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); + + // If either block has no values, there will be no intersection if (firstValueCount == 0 || secondValueCount == 0) { - // if either block has no values, there will be no intersection builder.appendNull(); return; } + // Extract values from first field (LinkedHashSet to preserve order) + Set firstSet = new LinkedHashSet<>(); int firstValueIndex = field1.getFirstValueIndex(position); - int secondValueIndex = field2.getFirstValueIndex(position); - - Set values = new LinkedHashSet<>(); for (int i = 0; i < firstValueCount; i++) { - values.add(getValueFunction.apply(firstValueIndex + i, field1)); + firstSet.add(getValueFunction.apply(firstValueIndex + i, field1)); } - Set secondValues = new HashSet<>(); + // Extract values from second field (HashSet - order doesn't matter for lookup) + Set secondSet = new HashSet<>(); + int secondValueIndex = field2.getFirstValueIndex(position); for (int i = 0; i < secondValueCount; i++) { - secondValues.add(getValueFunction.apply(secondValueIndex + i, field2)); + secondSet.add(getValueFunction.apply(secondValueIndex + i, field2)); } - values.retainAll(secondValues); - if (values.isEmpty()) { + // Compute intersection using helper + Set result = MvSetOperationHelper.intersection(firstSet, secondSet); + + if (result.isEmpty()) { builder.appendNull(); return; } + // Build result builder.beginPositionEntry(); - for (T value : values) { + for (T value : result) { addValueFunction.accept(value); } builder.endPositionEntry(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java new file mode 100644 index 0000000000000..246620a4f7833 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.multivalue; + +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * Shared set operations for MV functions. + * Preserves insertion order using LinkedHashSet. + */ +public final class MvSetOperationHelper { + + private MvSetOperationHelper() {} + + /** + * Union: returns all unique elements from both sets. + * Order: elements from set1 first, then new elements from set2. + */ + public static Set union(Set set1, Set set2) { + Set result = new LinkedHashSet<>(set1); + result.addAll(set2); + return result; + } + + /** + * Intersection: returns elements present in both sets. + * Order: preserved from set1. + */ + public static Set intersection(Set set1, Set set2) { + Set result = new LinkedHashSet<>(set1); + result.retainAll(set2); + return result; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index df64b68589666..ea38882cfe68c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -289,41 +289,42 @@ public boolean equals(Object obj) { } static void processUnionSet( - Block.Builder builder, - int position, - Block field1, - Block field2, - BiFunction getValueFunction, - Consumer addValueFunction + Block.Builder builder, + int position, + Block field1, + Block field2, + BiFunction getValueFunction, + Consumer addValueFunction ) { int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); - // If both field has no values, return null + // If both fields have no values, return null if (firstValueCount == 0 && secondValueCount == 0) { builder.appendNull(); return; } + // Extract values from first field + Set firstSet = new LinkedHashSet<>(); int firstValueIndex = field1.getFirstValueIndex(position); - int secondValueIndex = field2.getFirstValueIndex(position); - - // Use LinkedHashSet to maintain insertion order - Set values = new LinkedHashSet<>(); - - // Add all values from first field for (int i = 0; i < firstValueCount; i++) { - values.add(getValueFunction.apply(firstValueIndex + i, field1)); + firstSet.add(getValueFunction.apply(firstValueIndex + i, field1)); } - // Add all values from second field (duplicates automatically ignored by Set) + // Extract values from second field + Set secondSet = new LinkedHashSet<>(); + int secondValueIndex = field2.getFirstValueIndex(position); for (int i = 0; i < secondValueCount; i++) { - values.add(getValueFunction.apply(secondValueIndex + i, field2)); + secondSet.add(getValueFunction.apply(secondValueIndex + i, field2)); } + // Compute union using helper + Set result = MvSetOperationHelper.union(firstSet, secondSet); + // Build result builder.beginPositionEntry(); - for (T value : values) { + for (T value : result) { addValueFunction.accept(value); } builder.endPositionEntry(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java new file mode 100644 index 0000000000000..e69de29bb2d1d From 5ed5dbec37a9980c2b05319018e010de85bf67a7 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 23 Dec 2025 23:59:17 +0000 Subject: [PATCH 2/3] cleaned up the PR --- .../expression/function/scalar/multivalue/MvIntersection.java | 3 +-- .../function/scalar/multivalue/MvSetOperationHelperTests.java | 0 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java index b512e770235ef..4e349ac4865a5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java @@ -287,8 +287,8 @@ static void processIntersectionSet( int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); - // If either block has no values, there will be no intersection if (firstValueCount == 0 || secondValueCount == 0) { + // If either block has no values, there will be no intersection builder.appendNull(); return; } @@ -315,7 +315,6 @@ static void processIntersectionSet( return; } - // Build result builder.beginPositionEntry(); for (T value : result) { addValueFunction.accept(value); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelperTests.java deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 7e83ce5c5134b67e015d849579ec18bdcc905d39 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 6 Jan 2026 18:14:02 +0000 Subject: [PATCH 3/3] updated code to use operational set --- .../scalar/multivalue/MvIntersection.java | 9 ++--- .../multivalue/MvSetOperationHelper.java | 39 +++++++++++-------- .../function/scalar/multivalue/MvUnion.java | 20 +++++----- 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java index 4e349ac4865a5..c32aacd4690dc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvIntersection.java @@ -38,7 +38,6 @@ import java.io.IOException; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.Objects; import java.util.Set; import java.util.function.BiFunction; @@ -293,8 +292,8 @@ static void processIntersectionSet( return; } - // Extract values from first field (LinkedHashSet to preserve order) - Set firstSet = new LinkedHashSet<>(); + // Extract values from first field into OperationalSet (preserves order) + MvSetOperationHelper.OperationalSet firstSet = new MvSetOperationHelper.OperationalSet<>(); int firstValueIndex = field1.getFirstValueIndex(position); for (int i = 0; i < firstValueCount; i++) { firstSet.add(getValueFunction.apply(firstValueIndex + i, field1)); @@ -307,8 +306,8 @@ static void processIntersectionSet( secondSet.add(getValueFunction.apply(secondValueIndex + i, field2)); } - // Compute intersection using helper - Set result = MvSetOperationHelper.intersection(firstSet, secondSet); + // Compute intersection in-place + Set result = firstSet.intersect(secondSet); if (result.isEmpty()) { builder.appendNull(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java index 246620a4f7833..e0ce4dfaee377 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSetOperationHelper.java @@ -18,23 +18,28 @@ public final class MvSetOperationHelper { private MvSetOperationHelper() {} - /** - * Union: returns all unique elements from both sets. - * Order: elements from set1 first, then new elements from set2. - */ - public static Set union(Set set1, Set set2) { - Set result = new LinkedHashSet<>(set1); - result.addAll(set2); - return result; - } + public static class OperationalSet extends LinkedHashSet { + + /** + * Performs an in-place union with the given set. + * Adds all elements from the given set to this set. + * @param set the set to union with + * @return this set after the union operation + */ + public Set union(Set set) { + this.addAll(set); + return this; + } - /** - * Intersection: returns elements present in both sets. - * Order: preserved from set1. - */ - public static Set intersection(Set set1, Set set2) { - Set result = new LinkedHashSet<>(set1); - result.retainAll(set2); - return result; + /** + * Performs an in-place intersection with the given set. + * Retains only elements that are present in both sets. + * @param set the set to intersect with + * @return this set after the intersection operation + */ + public Set intersect(Set set) { + this.retainAll(set); + return this; + } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java index ea38882cfe68c..eaaa785c29a53 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvUnion.java @@ -289,12 +289,12 @@ public boolean equals(Object obj) { } static void processUnionSet( - Block.Builder builder, - int position, - Block field1, - Block field2, - BiFunction getValueFunction, - Consumer addValueFunction + Block.Builder builder, + int position, + Block field1, + Block field2, + BiFunction getValueFunction, + Consumer addValueFunction ) { int firstValueCount = field1.getValueCount(position); int secondValueCount = field2.getValueCount(position); @@ -305,8 +305,8 @@ static void processUnionSet( return; } - // Extract values from first field - Set firstSet = new LinkedHashSet<>(); + // Extract values from first field into OperationalSet + MvSetOperationHelper.OperationalSet firstSet = new MvSetOperationHelper.OperationalSet<>(); int firstValueIndex = field1.getFirstValueIndex(position); for (int i = 0; i < firstValueCount; i++) { firstSet.add(getValueFunction.apply(firstValueIndex + i, field1)); @@ -319,8 +319,8 @@ static void processUnionSet( secondSet.add(getValueFunction.apply(secondValueIndex + i, field2)); } - // Compute union using helper - Set result = MvSetOperationHelper.union(firstSet, secondSet); + // Compute union in-place + Set result = firstSet.union(secondSet); // Build result builder.beginPositionEntry();