From cab581d34304e7415050c182c21978218d0d3fa9 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Mon, 12 Jan 2026 12:44:46 +0200 Subject: [PATCH 1/2] wip --- .../optimizer/rules/logical/PruneColumns.java | 51 +++++++++++++-- .../optimizer/LogicalPlanOptimizerTests.java | 64 +++++++++++++++++++ 2 files changed, 109 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java index f80bc973d3fc8..e7c75af45ba7f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; +import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -102,7 +103,6 @@ private static LogicalPlan pruneColumns(LogicalPlan plan, AttributeSet.Builder u private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, AttributeSet.Builder used, boolean inlineJoin) { LogicalPlan p = aggregate; var remaining = pruneUnusedAndAddReferences(aggregate.aggregates(), used); - if (remaining == null) { return p; } @@ -141,8 +141,8 @@ private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, Attribut private static LogicalPlan pruneColumnsInInlineJoinRight(InlineJoin ij, AttributeSet.Builder used, Holder recheck) { LogicalPlan p = ij; - var right = pruneColumns(ij.right(), used, true); + if (right.output().isEmpty() || isLocalEmptyRelation(right)) { p = pruneRightSideAndProject(ij); recheck.set(true); @@ -199,12 +199,43 @@ private static LogicalPlan pruneColumnsInEval(Eval eval, AttributeSet.Builder us } // Note: only run when the Project is a descendent of an InlineJoin. + // Note: it's only applied on Projects on the right-hand side of the InlineJoin. private static LogicalPlan pruneColumnsInProject(Project project, AttributeSet.Builder used, Holder recheck) { LogicalPlan p = project; + LogicalPlan newChild = pruneColumns(project.child(), used, true); + + if (newChild == project.child()) { + return p; + } + List unPrunnableAttrs = List.of(); + // Check to see if there is any aggregation left and, if so, compare the aggregation's groupings with the project's remaining + // used attributes. We don't want to prune all groupings if there is at least one aggregate left, since we need the grouping + // to compute the aggregate. + var aggs = newChild.collectFirstChildren(a -> a instanceof Aggregate); + if (aggs.isEmpty() == false) {// this can happen if the aggregate is completely pruned and replaced by its child + var agg = (Aggregate) aggs.get(0); + + boolean isOneGroupingKept = false; + boolean isOneAggregateKept = false; + var output = newChild.output(); + for (var attr : output) { + if (isOneGroupingKept == false && agg.groupings().contains(attr)) { + isOneGroupingKept = true; + } + if (isOneAggregateKept == false && agg.aggregates().contains(attr) && agg.groupings().contains(attr) == false) { + isOneAggregateKept = true; + } + } - var remaining = pruneUnusedAndAddReferences(project.projections(), used); + if (isOneGroupingKept == false && agg.groupings().isEmpty() == false && isOneAggregateKept) { + // don't let the groupings be completely pruned if there is at least one aggregate left + unPrunnableAttrs = agg.groupings(); + } + } + + var remaining = pruneUnusedAndAddReferences(project.projections(), used, unPrunnableAttrs); if (remaining != null) { - p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), project.child(), remaining); + p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), newChild, remaining); recheck.set(true); } @@ -303,7 +334,11 @@ private static boolean isLocalEmptyRelation(LogicalPlan plan) { * Returns null if no pruning occurred. * As a side effect, the references of the kept attributes are added to the input set (builder) -- irrespective of the return value. */ - private static List pruneUnusedAndAddReferences(List named, AttributeSet.Builder used) { + private static List pruneUnusedAndAddReferences( + List named, + AttributeSet.Builder used, + List exceptions + ) { var clone = new ArrayList<>(named); for (var it = clone.listIterator(clone.size()); it.hasPrevious();) { @@ -311,11 +346,15 @@ private static List pruneUnusedAndAddReferences(L var attr = prev.toAttribute(); if (used.contains(attr)) { used.addAll(prev.references()); - } else { + } else if (exceptions.contains(attr) == false) { it.remove(); } } return clone.size() != named.size() ? clone : null; } + + private static List pruneUnusedAndAddReferences(List named, AttributeSet.Builder used) { + return pruneUnusedAndAddReferences(named, used, List.of()); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e9b8eeb59419a..a2723f1819725 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -10775,4 +10775,68 @@ public void testDoubleInlineStatsPrunning_With_MV_Functions() { assertThat(mvAvgAlias.child(), instanceOf(MvAvg.class)); as(leftEval.child(), EsRelation.class); } + + /* + * EsqlProject[[salary{f}#15, aaaaa{r}#7]] + * \_Limit[10[INTEGER],false,false] + * \_InlineJoin[LEFT,[languages{f}#13],[languages{r}#13]] + * |_EsqlProject[[languages{f}#13, salary{f}#15]] + * | \_EsRelation[employees][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..] + * \_Project[[aaaaa{r}#7, languages{f}#13]] + * \_Eval[[$$SUM$aaaaa$0{r$}#21 / $$COUNT$aaaaa$1{r$}#22 AS aaaaa#7]] + * \_Aggregate[[languages{f}#13],[SUM(salary{f}#15,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$aaaaa$0#21 + * , COUNT(salary{f}#15,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$aaaaa$1#22, languages{f}#13]] + * \_StubRelation[[languages{f}#13, salary{f}#15]] + */ + public void testDropInlineStatsGrouping() { + var query = """ + FROM employees + | KEEP languages, salary + | INLINE STATS aaaaa = AVG(salary) BY languages + | DROP languages + | LIMIT 10 + """; + + var plan = optimizedPlan(query); + } + + public void testKeepExceptInlineStatsGrouping() { + var query = """ + FROM employees + | KEEP languages, salary + | INLINE STATS aaaaa = AVG(salary) BY languages + | KEEP salary, aaaaa + | LIMIT 10 + """; + + var plan = optimizedPlan(query); + } + + public void testDropDoubleInlineStatsGrouping() { + var query = """ + FROM employees + | KEEP languages, salary + | INLINE STATS languages1 = MV_AVG(languages), avg = AVG(salary) BY languages + | INLINE STATS languages2 = SUM(languages1), avg = AVG(salary) BY languages + | DROP languages + | SORT salary + | LIMIT 10 + """; + + var plan = optimizedPlan(query); + } + + public void testDropDoubleInlineStats_PartialMultipleGroupings() { + var query = """ + FROM employees + | KEEP languages, salary, long_noidx, gender + | INLINE STATS languages1 = MV_AVG(long_noidx), avg = AVG(salary) BY languages, long_noidx + | INLINE STATS languages2 = SUM(languages1), avg = AVG(salary) BY languages, gender + | DROP languages + | SORT salary + | LIMIT 10 + """; + + var plan = optimizedPlan(query); + } } From bcfe02d40573b9e8ee33b9696f5542cd6c9dfed0 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Fri, 16 Jan 2026 18:41:04 +0200 Subject: [PATCH 2/2] Tests --- .../src/main/resources/inlinestats.csv-spec | 53 +++++++++ .../xpack/esql/action/EsqlCapabilities.java | 4 + .../optimizer/LogicalPlanOptimizerTests.java | 106 +++++++++++++++++- 3 files changed, 160 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index be50713930250..69a576ee77dea 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -5190,3 +5190,56 @@ ROW x = 0 x:integer | a:long | b:long | c:boolean | d:boolean 0 | 0 | 0 | false | true ; + +inlineStatsDroppingGroupingField +required_capability: inline_stats_drop_groupings_fix + +FROM employees +| KEEP languages, salary +| INLINE STATS avg_sal = ROUND(AVG(salary)) BY languages +| DROP languages +| SORT salary +| LIMIT 10 +; + + salary:i | avg_sal:d +25324 |41681.0 +25945 |41681.0 +25976 |50577.0 +26436 |52419.0 +27215 |47733.0 +28035 |50577.0 +28336 |52520.0 +28941 |52419.0 +29175 |48179.0 +30404 |52419.0 +; + +inlineStatsDroppingMultipleCommandsGroupingFields +required_capability: inline_stats_drop_groupings_fix + +FROM employees +| KEEP languages, salary, gender, emp_no, *rehired +| INLINE STATS avg_sal_by_gender = ROUND(AVG(salary)) BY gender +| INLINE STATS avg_sal_by_languages = ROUND(AVG(avg_sal_by_gender)) BY languages +| DROP languages, gender +| EVAL languages = emp_no +| INLINE STATS avg_sal_by_emp_no = ROUND(AVG(salary)) BY no = emp_no % 10, rehired = MV_MIN(is_rehired) +| RENAME no AS NO +| DROP NO, languages +| SORT salary +| LIMIT 10 +; + + salary:i | emp_no:i | is_rehired:boolean | avg_sal_by_gender:d | avg_sal_by_languages:d | avg_sal_by_emp_no:d | rehired:boolean +25324 |10015 |[false, false, false, true]|48761.0 |48779.0 |46801.0 |false +25945 |10035 |false |46861.0 |48779.0 |46801.0 |false +25976 |10092 |[false, false, true, true] |50491.0 |48082.0 |51831.0 |false +26436 |10048 |[true, true] |46861.0 |48142.0 |41230.0 |true +27215 |10057 |null |50491.0 |48177.0 |36905.0 |null +28035 |10084 |false |46861.0 |48082.0 |45508.0 |false +28336 |10026 |[false, true] |46861.0 |47950.0 |51094.0 |false +28941 |10068 |true |46861.0 |48142.0 |41230.0 |true +29175 |10060 |[false, false, false, true]|46861.0 |48116.0 |47571.0 |false +30404 |10042 |null |50491.0 |48142.0 |30404.0 |null +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 73270751347e8..6b41d8e16d417 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1908,6 +1908,10 @@ public enum Cap { */ CONDITIONAL_BLOCK_LOADER_FOR_TEXT_FIELDS, + /** + * Fixes https://github.com/elastic/elasticsearch/issues/139359 + */ + INLINE_STATS_DROP_GROUPINGS_FIX(INLINE_STATS.enabled), // Last capability should still have a comma for fewer merge conflicts when adding new ones :) // This comment prevents the semicolon from being on the previous capability when Spotless formats the file. ; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 53ae6be25bcad..6b60b4733acc8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -10896,10 +10896,10 @@ public void testDoubleInlineStatsPrunning_With_MV_Functions() { } /* - * EsqlProject[[salary{f}#15, aaaaa{r}#7]] + * Project[[salary{f}#15, aaaaa{r}#7]] * \_Limit[10[INTEGER],false,false] * \_InlineJoin[LEFT,[languages{f}#13],[languages{r}#13]] - * |_EsqlProject[[languages{f}#13, salary{f}#15]] + * |_Project[[languages{f}#13, salary{f}#15]] * | \_EsRelation[employees][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..] * \_Project[[aaaaa{r}#7, languages{f}#13]] * \_Eval[[$$SUM$aaaaa$0{r$}#21 / $$COUNT$aaaaa$1{r$}#22 AS aaaaa#7]] @@ -10916,9 +10916,34 @@ public void testDropInlineStatsGrouping() { | LIMIT 10 """; - var plan = optimizedPlan(query); + var plan = as(optimizedPlan(query), Project.class); + var limit = as(plan.child(), Limit.class); + var join = as(limit.child(), InlineJoin.class); + // Left + var leftProject = as(join.left(), Project.class); + assertMap(Expressions.names(leftProject.projections()), is(List.of("languages", "salary"))); + as(leftProject.child(), EsRelation.class); + // Right + var rightProject = as(join.right(), Project.class); + assertMap(Expressions.names(rightProject.projections()), is(List.of("aaaaa", "languages"))); + var rightEval = as(rightProject.child(), Eval.class); + var aggregate = as(rightEval.child(), Aggregate.class); + assertMap(Expressions.names(aggregate.aggregates()), is(List.of("$$SUM$aaaaa$0", "$$COUNT$aaaaa$1", "languages"))); + as(aggregate.child(), StubRelation.class); } + /* + * Project[[salary{f}#16, aaaaa{r}#7]] + * \_Limit[10[INTEGER],false,false] + * \_InlineJoin[LEFT,[languages{f}#14],[languages{r}#14]] + * |_Project[[languages{f}#14, salary{f}#16]] + * | \_EsRelation[employees][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + * \_Project[[aaaaa{r}#7, languages{f}#14]] + * \_Eval[[$$SUM$aaaaa$0{r$}#22 / $$COUNT$aaaaa$1{r$}#23 AS aaaaa#7]] + * \_Aggregate[[languages{f}#14],[SUM(salary{f}#16,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$aaaaa$0#22 + * , COUNT(salary{f}#16,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$aaaaa$1#23, languages{f}#14]] + * \_StubRelation[[languages{f}#14, salary{f}#16]] + */ public void testKeepExceptInlineStatsGrouping() { var query = """ FROM employees @@ -10929,8 +10954,36 @@ public void testKeepExceptInlineStatsGrouping() { """; var plan = optimizedPlan(query); + Project project = as(plan, Project.class); + var limit = as(project.child(), Limit.class); + var join = as(limit.child(), InlineJoin.class); + // Left + var leftProject = as(join.left(), Project.class); + assertMap(Expressions.names(leftProject.projections()), is(List.of("languages", "salary"))); + as(leftProject.child(), EsRelation.class); + // Right + var rightProject = as(join.right(), Project.class); + assertMap(Expressions.names(rightProject.projections()), is(List.of("aaaaa", "languages"))); + var rightEval = as(rightProject.child(), Eval.class); + var aggregate = as(rightEval.child(), Aggregate.class); + assertMap(Expressions.names(aggregate.aggregates()), is(List.of("$$SUM$aaaaa$0", "$$COUNT$aaaaa$1", "languages"))); + as(aggregate.child(), StubRelation.class); } + /* + * Project[[salary{f}#26, languages1{r}#7, languages2{r}#14, avg{r}#17]] + * \_TopN[[Order[salary{f}#26,ASC,LAST]],10[INTEGER],false] + * \_InlineJoin[LEFT,[languages{f}#24],[languages{r}#24]] + * |_Project[[salary{f}#26, languages1{r}#7, languages{f}#24]] + * | \_Eval[[MVAVG(languages{f}#24) AS languages1#7]] + * | \_EsRelation[employees][_meta_field{f}#27, emp_no{f}#21, first_name{f}#22, ..] + * \_Project[[languages2{r}#14, avg{r}#17, languages{f}#24]] + * \_Eval[[$$SUM$avg$0{r$}#34 / $$COUNT$avg$1{r$}#35 AS avg#17]] + * \_Aggregate[[languages{f}#24],[SUM(languages1{r}#7,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS languages2#14 + * , SUM(salary{f}#26,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$avg$0#34, + * COUNT(salary{f}#26,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$avg$1#35, languages{f}#24]] + * \_StubRelation[[salary{f}#26, languages1{r}#7, avg{r}#10, languages{f}#24]] + */ public void testDropDoubleInlineStatsGrouping() { var query = """ FROM employees @@ -10943,8 +10996,37 @@ public void testDropDoubleInlineStatsGrouping() { """; var plan = optimizedPlan(query); + Project project = as(plan, Project.class); + var topN = as(project.child(), TopN.class); + var join = as(topN.child(), InlineJoin.class); + // Left + var leftProject = as(join.left(), Project.class); + assertMap(Expressions.names(leftProject.projections()), is(List.of("salary", "languages1", "languages"))); + var leftEval = as(leftProject.child(), Eval.class); + as(leftEval.child(), EsRelation.class); + // Right + var rightProject = as(join.right(), Project.class); + assertMap(Expressions.names(rightProject.projections()), is(List.of("languages2", "avg", "languages"))); + var rightEval = as(rightProject.child(), Eval.class); + var aggregate = as(rightEval.child(), Aggregate.class); + assertMap(Expressions.names(aggregate.aggregates()), is(List.of("languages2", "$$SUM$avg$0", "$$COUNT$avg$1", "languages"))); + as(aggregate.child(), StubRelation.class); } + /* + * Project[[salary{f}#30, languages1{r}#9, long_noidx{f}#35, languages2{r}#17, avg{r}#20, gender{f}#27]] + * \_TopN[[Order[salary{f}#30,ASC,LAST]],10[INTEGER],false] + * \_InlineJoin[LEFT,[languages{f}#28, gender{f}#27],[languages{r}#28, gender{r}#27]] + * |_Project[[salary{f}#30, gender{f}#27, languages1{r}#9, languages{f}#28, long_noidx{f}#35]] + * | \_Eval[[MVAVG(long_noidx{f}#35) AS languages1#9]] + * | \_EsRelation[employees][_meta_field{f}#31, emp_no{f}#25, first_name{f}#26, ..] + * \_Project[[languages2{r}#17, avg{r}#20, languages{f}#28, gender{f}#27]] + * \_Eval[[$$SUM$avg$0{r$}#38 / $$COUNT$avg$1{r$}#39 AS avg#20]] + * \_Aggregate[[languages{f}#28, gender{f}#27],[SUM(languages1{r}#9,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS + * languages2#17, SUM(salary{f}#30,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$avg$0#38, + * COUNT(salary{f}#30,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$avg$1#39, languages{f}#28, gender{f}#27]] + * \_StubRelation[[salary{f}#30, gender{f}#27, languages1{r}#9, avg{r}#12, languages{f}#28, long_noidx{f}#35]] + */ public void testDropDoubleInlineStats_PartialMultipleGroupings() { var query = """ FROM employees @@ -10957,5 +11039,23 @@ public void testDropDoubleInlineStats_PartialMultipleGroupings() { """; var plan = optimizedPlan(query); + Project project = as(plan, Project.class); + var topN = as(project.child(), TopN.class); + var join = as(topN.child(), InlineJoin.class); + // Left + var leftProject = as(join.left(), Project.class); + assertMap(Expressions.names(leftProject.projections()), is(List.of("salary", "gender", "languages1", "languages", "long_noidx"))); + var leftEval = as(leftProject.child(), Eval.class); + var leftRelation = as(leftEval.child(), EsRelation.class); + // Right + var rightProject = as(join.right(), Project.class); + assertMap(Expressions.names(rightProject.projections()), is(List.of("languages2", "avg", "languages", "gender"))); + var rightEval = as(rightProject.child(), Eval.class); + var aggregate = as(rightEval.child(), Aggregate.class); + assertMap( + Expressions.names(aggregate.aggregates()), + is(List.of("languages2", "$$SUM$avg$0", "$$COUNT$avg$1", "languages", "gender")) + ); + as(aggregate.child(), StubRelation.class); } }