From cc1540872ecd111634af21057f1792485db2ca23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nacho=20Cord=C3=B3n?= Date: Tue, 18 Nov 2025 10:20:58 +0100 Subject: [PATCH 1/2] Fixes esql class cast bug in STATS at planning level (#137511) --- docs/changelog/137511.yaml | 5 + .../src/main/resources/inlinestats.csv-spec | 17 + .../src/main/resources/stats.csv-spec | 71 ++ .../xpack/esql/action/EsqlCapabilities.java | 7 + .../esql/optimizer/LogicalPlanOptimizer.java | 9 + .../rules/logical/DeduplicateAggs.java | 21 + ...ReplaceAggregateAggExpressionWithEval.java | 28 +- .../optimizer/LogicalPlanOptimizerTests.java | 31 +- .../rules/logical/DeduplicateAggsTests.java | 616 ++++++++++++++++++ 9 files changed, 786 insertions(+), 19 deletions(-) create mode 100644 docs/changelog/137511.yaml create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java diff --git a/docs/changelog/137511.yaml b/docs/changelog/137511.yaml new file mode 100644 index 0000000000000..14340bda606e9 --- /dev/null +++ b/docs/changelog/137511.yaml @@ -0,0 +1,5 @@ +pr: 137511 +summary: Fixes esql class cast bug in STATS at planning level +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index 30ed34d9ec611..5eadaf5b43f49 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1001,3 +1001,20 @@ FROM hosts METADATA _index description:text | host:keyword | ip0:ip | _index:keyword | x:ip | ip1:long | host_group:text | card:keyword alpha db server |alpha |127.0.0.1 |hosts |127.0.0.1|1 |DB servers |eth0 ; + +fixClassCastBugWithSeveralCounts +required_capability: inline_stats +required_capability: fix_stats_classcast_exception + +FROM sample_data, sample_data_str +| EVAL one_ip = client_ip::ip +| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip) +| KEEP count1, count2 +| LIMIT 3 +; + +count1:long |count2:long +14 |14 +14 |14 +14 |14 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 22f1e7199fc54..49faf97ac6685 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -3344,3 +3344,74 @@ VALUES(color):keyword | color:keyword yellow | yellow // end::mv-group-values-expand-result[] ; + +fixClassCastBugWithCountDistinct +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = count(x), b = count(x) + count(x), c = count_distinct(x) +; + +a:long | b:long | c:long +891 | 1782 | 8 +; + +fixClassCastBugWithValuesFn +required_capability: fix_stats_classcast_exception + +ROW x = [1,2,3] +| STATS a = MV_COUNT(VALUES(x)), b = VALUES(x), c = SUM(x) +; + +a:integer | b:integer | c:long +3 | [1, 2, 3] | 6 +; + +fixClassCastBugWithSeveralCountDistincts +required_capability: fix_stats_classcast_exception + +ROW x = 1 +| STATS a = 2*COUNT_DISTINCT(x), b = COUNT_DISTINCT(x), c = MAX(x) +; + +a:long | b:long | c:integer +2 | 1 | 1 +; + +fixClassCastBugWithMedianPlusCountDistinct +required_capability: fix_stats_classcast_exception + +FROM sample_data_ts_long +| EVAL sym1 = 0, sym5 = 1 +| STATS sym2 = median(sym5) + 0, sym3 = median(sym5), sym4 = count_distinct(sym1) +; + +sym2:double |sym3:double | sym4:long +1.0 | 1.0 | 1 +; + +fixClassCastBugWithFoldableLiterals +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1) +; + +a:long | b:long | c:long | d:long +891 | 1782 | 8 | 8 +; + +fixClassCastBugWithSurrogateExpressions +required_capability: fix_stats_classcast_exception + +from airports +| rename scalerank AS x +| stats a = median(x), b = percentile(x, 50), c = count_distinct(x) +; + +a:double | b:double | c:long +6.0 | 6.0 | 8 +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index e2c3b7cd1beda..4c0bc0ff6af40 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1282,6 +1282,13 @@ public enum Cap { */ STATS_WITH_FILTERED_SURROGATE_FIXED, + /** + * Fix for ClassCastException in STATS + * https://github.com/elastic/elasticsearch/issues/133992 + * https://github.com/elastic/elasticsearch/issues/136598 + */ + FIX_STATS_CLASSCAST_EXCEPTION, + /** * {@link org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAliasingEvalWithProject} did not fully account for shadowing. * https://github.com/elastic/elasticsearch/issues/137019. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 14a858f85fd2a..cfee578df349e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineEvals; import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineProjections; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConstantFolding; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggs; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter; import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull; import org.elasticsearch.xpack.esql.optimizer.rules.logical.LiteralsOnTheRight; @@ -171,6 +172,14 @@ protected static Batch operators(boolean local) { new SplitInWithFoldableValue(), new PropagateEvalFoldables(), new ConstantFolding(), + /* Then deduplicate aggregations + We need this after the constant folding + because we could have expressions like + count_distinct(_, 9 + 1) + count_distinct(_, 10) + which are semantically identical + */ + new DeduplicateAggs(), new PartiallyFoldCase(), // boolean new BooleanSimplification(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java new file mode 100644 index 0000000000000..170f87f86f8ff --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical; + +/** + * This rule handles duplicate aggregate functions to avoid duplicate compute + * stats a = min(x), b = min(x), c = count(*), d = count() by g + * becomes + * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g + */ +public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval { + + public DeduplicateAggs() { + super(false); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java index 075d8676abde4..52e99fd0bf89c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java @@ -43,9 +43,17 @@ * becomes * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g */ -public final class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule { +public class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule { + private final boolean replaceNestedExpressions; + + public ReplaceAggregateAggExpressionWithEval(boolean replaceNestedExpressions) { + super(OptimizerRules.TransformDirection.UP); + this.replaceNestedExpressions = replaceNestedExpressions; + } + public ReplaceAggregateAggExpressionWithEval() { super(OptimizerRules.TransformDirection.UP); + this.replaceNestedExpressions = true; } @Override @@ -88,7 +96,7 @@ protected LogicalPlan rule(Aggregate aggregate) { // common case - handle duplicates if (child instanceof AggregateFunction af) { // canonical representation, with resolved aliases - AggregateFunction canonical = (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e)); + AggregateFunction canonical = getCannonical(af, aliases); Alias found = rootAggs.get(canonical); // aggregate is new @@ -106,14 +114,15 @@ protected LogicalPlan rule(Aggregate aggregate) { } // nested expression over aggregate function or groups // replace them with reference and move the expression into a follow-up eval - else { + else if (replaceNestedExpressions) { changed.set(true); Expression aggExpression = child.transformUp(AggregateFunction.class, af -> { - AggregateFunction canonical = (AggregateFunction) af.canonical(); + // canonical representation, with resolved aliases + AggregateFunction canonical = getCannonical(af, aliases); Alias alias = rootAggs.get(canonical); if (alias == null) { - // create synthetic alias ove the found agg function - alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), canonical, null, true); + // create synthetic alias over the found agg function + alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), af.canonical(), null, true); // and remember it to remove duplicates rootAggs.put(canonical, alias); // add it to the list of aggregates and continue @@ -132,6 +141,9 @@ protected LogicalPlan rule(Aggregate aggregate) { Alias alias = as.replaceChild(aggExpression); newEvals.add(alias); newProjections.add(alias.toAttribute()); + } else { + newAggs.add(agg); + newProjections.add(agg.toAttribute()); } } // not an alias (e.g. grouping field) @@ -155,6 +167,10 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } + private static AggregateFunction getCannonical(AggregateFunction af, AttributeMap aliases) { + return (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e)); + } + private static String syntheticName(Expression expression, Expression af, int counter) { return TemporaryNameUtils.temporaryName(expression, af, counter); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 2cd28b817a184..3ff2c3f8be2e8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -178,6 +178,13 @@ import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LTE; +<<<<<<< HEAD +======= +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aggFieldName; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aliased; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.DOWN; +import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP; +>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511)) import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -473,6 +480,7 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception { } /** +<<<<<<< HEAD * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]] * \_Limit[1000[INTEGER]] * \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]] @@ -495,6 +503,9 @@ public void testCombineProjectionWithDuplicateAggregation() { } /** +======= + *
{@code
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Limit[1000[INTEGER]]
      * \_Aggregate[STANDARD,[],[SUM(salary{f}#12,true[BOOLEAN]) AS sum(salary), SUM(salary{f}#12,last_name{f}#11 == [44 6f 65][KEYW
      * ORD]) AS sum(salary) WheRe last_name ==   "Doe"]]
@@ -3809,6 +3820,7 @@ public void testPruneRenameOnAggBy() {
     }
 
     /**
+<<<<<<< HEAD
      * Expects
      * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
      * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
@@ -3971,6 +3983,8 @@ public void testEliminateDuplicateRenamedGroupings() {
     }
 
     /**
+=======
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Expected
      * Limit[2[INTEGER]]
      * \_Filter[a{r}#6 > 2[INTEGER]]
@@ -4061,19 +4075,6 @@ public void testLimitZeroUsesLocalRelation() {
         assertThat(plan, instanceOf(LocalRelation.class));
     }
 
-    private  T aliased(Expression exp, Class clazz) {
-        var alias = as(exp, Alias.class);
-        return as(alias.child(), clazz);
-    }
-
-    private  void aggFieldName(Expression exp, Class aggType, String fieldName) {
-        var alias = as(exp, Alias.class);
-        var af = as(alias.child(), aggType);
-        var field = af.field();
-        var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
-        assertThat(name, is(fieldName));
-    }
-
     /**
      * Expects
      * Limit[1000[INTEGER]]
@@ -4831,6 +4832,7 @@ public void testStatsExpOverAggsWithScalars() {
 
     /**
      * Expects
+<<<<<<< HEAD
      * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
      * >$MAX$1{r}#45 AS g]]
      * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
@@ -4902,6 +4904,9 @@ public void testStatsExpOverAggsWithScalarAndDuplicateAggs() {
 
     /**
      * Expects
+=======
+     * 
{@code
+>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Project[[a{r}#5, a{r}#5 AS b, w{r}#12]]
      * \_Limit[1000[INTEGER]]
      *   \_Aggregate[[w{r}#12],[SUM($$salary_/_2_+_la>$SUM$0{r}#26) AS a, w{r}#12]]
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
new file mode 100644
index 0000000000000..e656cc364691a
--- /dev/null
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
@@ -0,0 +1,616 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.optimizer.rules.logical;
+
+import org.elasticsearch.common.lucene.BytesRefs;
+import org.elasticsearch.xpack.esql.core.expression.Alias;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.Expressions;
+import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.expression.FoldContext;
+import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Max;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Min;
+import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div;
+import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul;
+import org.elasticsearch.xpack.esql.optimizer.AbstractLogicalPlanOptimizerTests;
+import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
+import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
+import org.elasticsearch.xpack.esql.plan.logical.Eval;
+import org.elasticsearch.xpack.esql.plan.logical.Limit;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+import org.elasticsearch.xpack.esql.plan.logical.Project;
+import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
+
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.as;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.is;
+
+//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug")
+public class DeduplicateAggsTests extends AbstractLogicalPlanOptimizerTests {
+    public static  void aggFieldName(Expression exp, Class aggType, String fieldName) {
+        var alias = as(exp, Alias.class);
+        var af = as(alias.child(), aggType);
+        var field = af.field();
+        var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
+        assertThat(name, is(fieldName));
+    }
+
+    public static  T aliased(Expression exp, Class clazz) {
+        var alias = as(exp, Alias.class);
+        return as(alias.child(), clazz);
+    }
+
+    /**
+     * 
{@code
+     * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]]
+     *     \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..]
+     * }
+ */ + public void testCombineProjectionWithDuplicateAggregation() { + var plan = plan(""" + from test + | stats s = sum(salary), d = sum(salary), c = sum(salary) by last_name, first_name + | keep d, s, last_name, first_name + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("d", "s", "last_name", "first_name")); + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + assertThat(Expressions.names(agg.aggregates()), contains("s", "last_name", "first_name")); + assertThat(Alias.unwrap(agg.aggregates().get(0)), instanceOf(Sum.class)); + assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name")); + } + + /** + * Expects + *
{@code
+     * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
+     * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
+     *       \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggsCountAll() { + var plan = plan(""" + from test + | stats c1 = count(1), c2 = count(2), cs = count(*), cm = count(), cexp = count("123") + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("c1", "c2", "cs", "cm", "cexp")); + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + assertThat(Expressions.names(fields), contains("c2", "cs", "cm", "cexp")); + for (Alias field : fields) { + assertThat(Expressions.name(field.child()), is("c1")); + } + var limit = as(eval.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("c1")); + aggFieldName(aggs.get(0), Count.class, "*"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
+     * >$MAX$1{r}#45 AS g]]
+     * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
+     * salary)_+_m>$AVG$0{r}#44 + $$avg(salary)_+_m>$MAX$1{r}#45 AS a, $$avg(salary)_+_m>$MAX$1{r}#45 + 3[INTEGER] +
+     * 3.141592653589793[DOUBLE] + $$max(salary)_+_3>$COUNT$2{r}#46 AS b]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[w{r}#28],[SUM(salary{f}#39) AS $$$$avg(salary)_+_m>$AVG$0$SUM$0, MAX(salary{f}#39) AS $$avg(salary)_+_m>$MAX$1
+     * , COUNT(salary{f}#39) AS $$max(salary)_+_3>$COUNT$2, MIN(salary{f}#39) AS $$count(salary)_->$MIN$3]]
+     *       \_Eval[[languages{f}#37 % 2[INTEGER] AS w]]
+     *         \_EsRelation[test][_meta_field{f}#40, emp_no{f}#34, first_name{f}#35, ..]
+     * }
+ */ + public void testStatsExpOverAggsWithScalarAndDuplicateAggs() { + var plan = optimizedPlan(""" + from test + | stats a = avg(salary) + max(salary), + b = max(salary) + 3 + PI() + count(salary), + c = count(salary) - min(salary), + d = count(salary), + e = min(salary), + f = max(salary), + g = max(salary) + by w = languages % 2 + | keep a, b, d, e, g + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("a", "b", "d", "e", "g")); + var refA = Alias.unwrap(projections.get(0)); + var refB = Alias.unwrap(projections.get(1)); + var refD = Alias.unwrap(projections.get(2)); + var refE = Alias.unwrap(projections.get(3)); + var refG = Alias.unwrap(projections.get(4)); + + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + // avg = Sum/Count + assertThat(Expressions.name(fields.get(0)), containsString("AVG")); + assertThat(Alias.unwrap(fields.get(0)), instanceOf(Div.class)); + // avg + max + assertThat(Expressions.name(fields.get(1)), is("a")); + var add = as(Alias.unwrap(fields.get(1)), Add.class); + var max_salary = add.right(); + assertThat(Expressions.attribute(fields.get(1)), is(Expressions.attribute(refA))); + + assertThat(Expressions.name(fields.get(2)), is("b")); + assertThat(Expressions.attribute(fields.get(2)), is(Expressions.attribute(refB))); + + add = as(Alias.unwrap(fields.get(2)), Add.class); + add = as(add.left(), Add.class); + add = as(add.left(), Add.class); + assertThat(Expressions.attribute(max_salary), is(Expressions.attribute(add.left()))); + + var limit = as(eval.child(), Limit.class); + + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + var sum = as(Alias.unwrap(aggs.get(0)), Sum.class); + + assertThat(Expressions.attribute(aggs.get(1)), is(Expressions.attribute(max_salary))); + var max = as(Alias.unwrap(aggs.get(1)), Max.class); + var count = as(Alias.unwrap(aggs.get(2)), Count.class); + var min = as(Alias.unwrap(aggs.get(3)), Min.class); + + eval = as(agg.child(), Eval.class); + fields = eval.fields(); + assertThat(Expressions.name(fields.get(0)), is("w")); + } + + /** + * Expects + *
{@code
+     * Project[[c1{r}#7, cx{r}#10, cs{r}#12, cy{r}#15]]
+     * \_Eval[[c1{r}#7 AS cx, c1{r}#7 AS cs, c1{r}#7 AS cy]]
+     *   \_Limit[1000[INTEGER]]
+     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
+     *       \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggsWithAliasedFields() { + var plan = plan(""" + from test + | eval x = 1 + | eval y = x + | stats c1 = count(1), cx = count(x), cs = count(*), cy = count(y) + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("c1", "cx", "cs", "cy")); + var eval = as(project.child(), Eval.class); + var fields = eval.fields(); + assertThat(Expressions.names(fields), contains("cx", "cs", "cy")); + for (Alias field : fields) { + assertThat(Expressions.name(field.child()), is("c1")); + } + var limit = as(eval.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("c1")); + aggFieldName(aggs.get(0), Count.class, "*"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[min{r}#1385, max{r}#1388, min{r}#1385 AS min2, max{r}#1388 AS max2, gender{f}#1398]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[gender{f}#1398],[MIN(salary{f}#1401) AS min, MAX(salary{f}#1401) AS max, gender{f}#1398]]
+     *     \_EsRelation[test][_meta_field{f}#1402, emp_no{f}#1396, first_name{f}#..]
+     * }
+ */ + public void testEliminateDuplicateAggsMixed() { + var plan = plan(""" + from test + | stats min = min(salary), max = max(salary), min2 = min(salary), max2 = max(salary) by gender + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("min", "max", "min2", "max2", "gender")); + as(projections.get(0), ReferenceAttribute.class); + as(projections.get(1), ReferenceAttribute.class); + assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("min")); + assertThat(Expressions.name(aliased(projections.get(3), ReferenceAttribute.class)), is("max")); + + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("min", "max", "gender")); + aggFieldName(aggs.get(0), Min.class, "salary"); + aggFieldName(aggs.get(1), Max.class, "salary"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Project[[max(x){r}#11, max(x){r}#11 AS max(y), max(x){r}#11 AS max(z)]]
+     * \_Limit[1000[INTEGER]]
+     *   \_Aggregate[[],[MAX(salary{f}#21) AS max(x)]]
+     *     \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
+     * }
+ */ + public void testEliminateDuplicateAggsNonCount() { + var plan = plan(""" + from test + | eval x = salary + | eval y = x + | eval z = y + | stats max(x), max(y), max(z) + """); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(Expressions.names(projections), contains("max(x)", "max(y)", "max(z)")); + as(projections.get(0), ReferenceAttribute.class); + assertThat(Expressions.name(aliased(projections.get(1), ReferenceAttribute.class)), is("max(x)")); + assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("max(x)")); + + var limit = as(project.child(), Limit.class); + var agg = as(limit.child(), Aggregate.class); + var aggs = agg.aggregates(); + assertThat(Expressions.names(aggs), contains("max(x)")); + aggFieldName(aggs.get(0), Max.class, "salary"); + var source = as(agg.child(), EsRelation.class); + } + + /** + * Expects + *
{@code
+     * Limit[1000[INTEGER]]
+     * \_Aggregate[[salary{f}#12],[salary{f}#12, salary{f}#12 AS x]]
+     *   \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..]
+     * }
+ */ + public void testEliminateDuplicateRenamedGroupings() { + var plan = plan(""" + from test + | eval x = salary + | stats by salary, x + """); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var relation = as(agg.child(), EsRelation.class); + + assertThat(Expressions.names(agg.groupings()), contains("salary")); + assertThat(Expressions.names(agg.aggregates()), contains("salary", "x")); + } + + /** + * Expects + *
{@code
+     * EsqlProject[[a{r}#5, c{r}#8]]
+     * \_Eval[[null[INTEGER] AS x]]
+     *   \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..]
+     * }
+ */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634") + public void testEliminateDuplicateAggWithNull() { + var plan = plan(""" + from test + | eval x = null + 1 + | stats a = avg(x), c = count(x) + """); + fail("Awaits fix"); + } + + /** + *
{@code
+     * Project[[a{r}#8, b{r}#12, c{r}#15, c{r}#15 AS d#18]]
+     * \_Eval[[a{r}#8 + a{r}#8 AS b#12]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[COUNT(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION]) AS a#8,
+     *     COUNTDISTINCT(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION],10[INTEGER]) AS c#15]]
+     *       \_EsRelation[airports][abbrev{f}#19, city{f}#25, city_location{f}#26, coun..]
+     * }
+ */ + public void testAggsDeduplication() { + String query = """ + FROM airports + | rename scalerank AS x + | stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(4)); + var a = as(projections.get(0), ReferenceAttribute.class); + var b = as(projections.get(1), ReferenceAttribute.class); + var c = as(projections.get(2), ReferenceAttribute.class); + var d = as(projections.get(3), Alias.class); + + assertEquals("a", a.name()); + assertEquals("b", b.name()); + assertEquals("c", c.name()); + assertEquals("d", d.name()); + assertEquals("c", as(d.child(), ReferenceAttribute.class).name()); + + Eval eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var bEval = as(eval.fields().getFirst(), Alias.class); + var bAdd = as(bEval.child(), Add.class); + + assertEquals("a", as(bAdd.left(), ReferenceAttribute.class).name()); + assertEquals("a", as(bAdd.right(), ReferenceAttribute.class).name()); + assertEquals("b", bEval.name()); + + Limit limit = as(eval.child(), Limit.class); + Aggregate agg = as(limit.child(), Aggregate.class); + + assertThat(agg.aggregates(), hasSize(2)); + var countAlias = as(agg.aggregates().get(0), Alias.class); + var countDistinctAliasFirst = as(agg.aggregates().get(1), Alias.class); + + var count = as(countAlias.child(), Count.class); + var countDistinct = as(countDistinctAliasFirst.child(), CountDistinct.class); + + assertEquals("a", countAlias.name()); + assertEquals("c", countDistinctAliasFirst.name()); + assertEquals("scalerank", as(count.field(), FieldAttribute.class).name()); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[MAX(y){r}#16, 2* MAX(a){r}#18]]
+     * \_Eval[[MAX(y){r}#16 * 2[INTEGER] AS 2* MAX(a)#18]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[MAX(scalerank{f}#21,true[BOOLEAN],PT0S[TIME_DURATION]) AS MAX(y)#16]]
+     *       \_EsRelation[airports][abbrev{f}#19, city{f}#25, city_location{f}#26, coun..]
+     * }
+ */ + public void testAggsDeduplicationWithComplicatedAliasChains() { + String query = """ + FROM airports + | RENAME scalerank AS x + | EVAL y = x, z = x + | RENAME z AS a + | STATS MAX(y), 2* MAX(a) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(2)); + + var firstResult = as(projections.get(0), ReferenceAttribute.class); + var secondResult = as(projections.get(1), ReferenceAttribute.class); + assertEquals("MAX(y)", firstResult.name()); + assertEquals("2* MAX(a)", secondResult.name()); + + var eval = as(project.child(), Eval.class); + var outputs = eval.output(); + assertThat(outputs, hasSize(2)); + + assertEquals("MAX(y)", as(outputs.get(0), ReferenceAttribute.class).name()); + assertEquals("2* MAX(a)", as(outputs.get(1), ReferenceAttribute.class).name()); + assertThat(eval.fields(), hasSize(1)); + var twoMax = as(eval.fields().getFirst(), Alias.class); + var mul = as(twoMax.child(), Mul.class); + assertEquals("MAX(y)", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("2* MAX(a)", twoMax.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(1)); + var agg = as(aggregates.getFirst(), Alias.class); + assertEquals("MAX(y)", agg.name()); + var maxY = as(agg.child(), Max.class); + assertEquals("scalerank", as(maxY.field(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[COUNT(y){r}#7, 2 * COUNT(scalerank){r}#9, y{r}#5]]
+     * \_Eval[[COUNT(y){r}#7 * 2[INTEGER] AS 2 * COUNT(scalerank)#9]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[scalerank{f}#13],[COUNT(scalerank{f}#13,true[BOOLEAN],PT0S[TIME_DURATION]) AS COUNT(y)#7, scalerank{f}#13 AS y
+     * #5]]
+     *       \_EsRelation[airports][abbrev{f}#11, city{f}#17, city_location{f}#18, coun..]
+     * }
+ */ + public void testAggsDeduplicationInByClauses() { + String query = """ + FROM airports + | STATS COUNT(y), 2 * COUNT(scalerank) BY y = scalerank + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("COUNT(y)", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("2 * COUNT(scalerank)", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("y", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + + assertThat(eval.fields(), hasSize(1)); + var evalAlias = as(eval.fields().getFirst(), Alias.class); + var mul = as(evalAlias.child(), Mul.class); + assertEquals("COUNT(y)", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("2 * COUNT(scalerank)", evalAlias.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(2)); + + var countAlias = as(aggregates.getFirst(), Alias.class); + assertEquals("COUNT(y)", countAlias.name()); + var countField = as(countAlias.child(), Count.class).field(); + assertEquals("scalerank", as(countField, FieldAttribute.class).name()); + assertEquals("scalerank", as(as(aggregates.get(1), Alias.class).child(), FieldAttribute.class).name()); + } + + /** + *
{@code
+     * Project[[a{r}#5, b{r}#9, c{r}#13]]
+     * \_Eval[[a{r}#5 * 2[INTEGER] AS b#9]]
+     *   \_Limit[1000[INTEGER],false,false]
+     *     \_Aggregate[[],[COUNT(scalerank{f}#16,scalerank{f}#16 > 7[INTEGER],PT0S[TIME_DURATION]) AS a#5, COUNTDISTINCT(scalerank{f}#
+     * 16,true[BOOLEAN],PT0S[TIME_DURATION]) AS c#13]]
+     *       \_EsRelation[airports][abbrev{f}#14, city{f}#20, city_location{f}#21, coun..]
+     * }
+ */ + public void testDuplicatedAggsWithSameCannonicalizationInWhereCondition() { + String query = """ + FROM airports + | STATS a = COUNT(scalerank) WHERE scalerank > 7, + b = 2*COUNT(scalerank) WHERE 7 < scalerank, + c = COUNT_DISTINCT(scalerank) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + + assertThat(eval.fields(), hasSize(1)); + var evalAlias = as(eval.fields().getFirst(), Alias.class); + var mul = as(evalAlias.child(), Mul.class); + assertEquals("a", as(mul.left(), ReferenceAttribute.class).name()); + assertEquals(2, as(mul.right(), Literal.class).value()); + assertEquals("b", evalAlias.name()); + + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + assertThat(aggregates, hasSize(2)); + + var a = as(aggregates.get(0), Alias.class); + var c = as(aggregates.get(1), Alias.class); + + assertEquals("a", a.name()); + var aField = as(a.child(), Count.class).field(); + assertEquals("scalerank", as(aField, FieldAttribute.class).name()); + + assertEquals("c", c.name()); + var cField = as(c.child(), CountDistinct.class).field(); + assertEquals("scalerank", as(cField, FieldAttribute.class).name()); + } + + /** + * Project[[a{r}#5, b{r}#8, c{r}#11]] + * \_Eval[[$$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS a#5, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[ + * INTEGER] AS b#8, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS c#11]] + * \_Limit[1000[INTEGER],false,false] + * \_Aggregate[[],[COUNTDISTINCT(scalerank{f}#14,true[BOOLEAN],PT0S[TIME_DURATION],100[INTEGER]) AS $$COUNTDISTINCT$2*COUNT_DI + * STINC>$0#20]] + * \_EsRelation[airports][abbrev{f}#12, city{f}#18, city_location{f}#19, coun..] + */ + public void testDuplicatedAggWithFoldableIdenticalExpressions() { + String query = """ + FROM airports + | STATS a = 2*COUNT_DISTINCT(scalerank, 100), + b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30), + c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1) + """; + + LogicalPlan plan = planAirports(query); + Project project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + var aggregates = as(as(eval.child(), Limit.class).child(), Aggregate.class).aggregates(); + + assertThat(eval.fields(), hasSize(3)); + var firstEvalField = as(as(eval.fields().get(0), Alias.class).child(), Mul.class); + var secondEvalField = as(as(eval.fields().get(1), Alias.class).child(), Mul.class); + var thirdEvalField = as(as(eval.fields().get(2), Alias.class).child(), Mul.class); + assertEquals(firstEvalField, secondEvalField); + assertEquals(secondEvalField, thirdEvalField); + + assertThat(aggregates, hasSize(1)); + var countDistinct = as(as(aggregates.get(0), Alias.class).child(), CountDistinct.class); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } + + /** + * Limit[1000[INTEGER],false,false] + * \_InlineJoin[LEFT,[],[]] + * |_EsRelation[airports][abbrev{f}#12, city{f}#18, city_location{f}#19, coun..] + * \_Project[[a{r}#5, b{r}#8, c{r}#11]] + * \_Eval[[$$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS a#5, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[ + * INTEGER] AS b#8, $$COUNTDISTINCT$2*COUNT_DISTINC>$0{r$}#20 * 2[INTEGER] AS c#11]] + * \_Aggregate[[],[COUNTDISTINCT(scalerank{f}#14,true[BOOLEAN],PT0S[TIME_DURATION],100[INTEGER]) AS $$COUNTDISTINCT$2*COUNT_DI + * STINC>$0#20]] + * \_StubRelation[[abbrev{f}#12, city{f}#18, city_location{f}#19, country{f}#17, location{f}#16, name{f}#13, scalerank{f}#14, ty + * pe{f}#15]] + */ + public void testDuplicatedInlineAggWithFoldableIdenticalExpressions() { + String query = """ + FROM airports + | INLINE STATS a = 2*COUNT_DISTINCT(scalerank, 100), + b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30), + c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1) + """; + + LogicalPlan plan = planAirports(query); + var limit = as(plan, Limit.class); + var inlineJoin = as(limit.child(), InlineJoin.class); + + var project = as(inlineJoin.right(), Project.class); + var projections = project.projections(); + assertThat(projections, hasSize(3)); + + assertEquals("a", as(projections.get(0), ReferenceAttribute.class).name()); + assertEquals("b", as(projections.get(1), ReferenceAttribute.class).name()); + assertEquals("c", as(projections.get(2), ReferenceAttribute.class).name()); + + var eval = as(project.child(), Eval.class); + var aggregates = as(eval.child(), Aggregate.class).aggregates(); + assertThat(eval.fields(), hasSize(3)); + var firstEvalField = as(as(eval.fields().get(0), Alias.class).child(), Mul.class); + var secondEvalField = as(as(eval.fields().get(1), Alias.class).child(), Mul.class); + var thirdEvalField = as(as(eval.fields().get(2), Alias.class).child(), Mul.class); + assertEquals(firstEvalField, secondEvalField); + assertEquals(secondEvalField, thirdEvalField); + + assertThat(aggregates, hasSize(1)); + var countDistinct = as(as(aggregates.get(0), Alias.class).child(), CountDistinct.class); + assertEquals("scalerank", as(countDistinct.field(), FieldAttribute.class).name()); + } +} From 4d70093452a7acca0095dbb58769f2d7569ce541 Mon Sep 17 00:00:00 2001 From: ncordon Date: Fri, 5 Dec 2025 16:29:14 +0100 Subject: [PATCH 2/2] Nits for 9.1 --- docs/changelog/137511.yaml | 4 +- .../src/main/resources/inlinestats.csv-spec | 5 +- .../optimizer/LocalLogicalPlanOptimizer.java | 7 + .../rules/logical/DeduplicateAggs.java | 2 +- .../rules/logical/OptimizerRules.java | 19 ++ .../optimizer/LogicalPlanOptimizerTests.java | 271 ------------------ .../rules/logical/DeduplicateAggsTests.java | 2 +- 7 files changed, 34 insertions(+), 276 deletions(-) diff --git a/docs/changelog/137511.yaml b/docs/changelog/137511.yaml index 14340bda606e9..c31f05da0eb2b 100644 --- a/docs/changelog/137511.yaml +++ b/docs/changelog/137511.yaml @@ -2,4 +2,6 @@ pr: 137511 summary: Fixes esql class cast bug in STATS at planning level area: ES|QL type: bug -issues: [] +issues: + - 133992 + - 136598 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec index 5eadaf5b43f49..ab18157558971 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec @@ -1003,12 +1003,13 @@ alpha db server |alpha |127.0.0.1 |hosts |127.0.0.1|1 ; fixClassCastBugWithSeveralCounts -required_capability: inline_stats +required_capability: inlinestats_v7 required_capability: fix_stats_classcast_exception FROM sample_data, sample_data_str | EVAL one_ip = client_ip::ip -| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip) +| INLINESTATS count1=count(client_ip::ip), count2=count(one_ip) +| SORT @timestamp | KEEP count1, count2 | LIMIT 3 ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index dab2b35025aa6..af161a473b83f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStringCasingWithInsensitiveRegexMatch; @@ -70,6 +71,12 @@ private static Batch localOperators() { // skip it: once a fragment contains an Agg, this can no longer be pruned, which the rule can do case ReplaceStatsFilteredAggWithEval ignoredReplace -> { } + case OptimizerRules.LocalAware localAware -> { + Rule local = localAware.local(); + if (local != null) { + newRules.add(local); + } + } default -> newRules.add(r); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java index 170f87f86f8ff..ff01b5255a493 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggs.java @@ -13,7 +13,7 @@ * becomes * stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g */ -public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval { +public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval implements OptimizerRules.CoordinatorOnly { public DeduplicateAggs() { super(false); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java index a32bf3a720088..a73340fa49b5c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/OptimizerRules.java @@ -109,4 +109,23 @@ public final LogicalPlan apply(LogicalPlan plan, P context) { protected abstract LogicalPlan rule(SubPlan plan, P context); } + + /** + * Rule that has a different implementation when applied to a local plan. + */ + public interface LocalAware { + /** + * the local version of the rule. {@code null} if the rule should not be applied locally. + */ + Rule local(); + } + + /** + * This rule should only be applied on the coordinator plan, not for a local plan. + */ + public interface CoordinatorOnly extends LocalAware { + default Rule local() { + return null; + } + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 3ff2c3f8be2e8..c06d200742bee 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -178,13 +178,8 @@ import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LTE; -<<<<<<< HEAD -======= import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aggFieldName; import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aliased; -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.DOWN; -import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP; ->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511)) import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -480,32 +475,6 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception { } /** -<<<<<<< HEAD - * Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]] - * \_Limit[1000[INTEGER]] - * \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]] - * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] - */ - public void testCombineProjectionWithDuplicateAggregation() { - var plan = plan(""" - from test - | stats s = sum(salary), d = sum(salary), c = sum(salary) by last_name, first_name - | keep d, s, last_name, first_name - """); - - var project = as(plan, Project.class); - assertThat(Expressions.names(project.projections()), contains("d", "s", "last_name", "first_name")); - var limit = as(project.child(), Limit.class); - var agg = as(limit.child(), Aggregate.class); - assertThat(Expressions.names(agg.aggregates()), contains("s", "last_name", "first_name")); - assertThat(Alias.unwrap(agg.aggregates().get(0)), instanceOf(Sum.class)); - assertThat(Expressions.names(agg.groupings()), contains("last_name", "first_name")); - } - - /** -======= - *
{@code
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Limit[1000[INTEGER]]
      * \_Aggregate[STANDARD,[],[SUM(salary{f}#12,true[BOOLEAN]) AS sum(salary), SUM(salary{f}#12,last_name{f}#11 == [44 6f 65][KEYW
      * ORD]) AS sum(salary) WheRe last_name ==   "Doe"]]
@@ -3820,171 +3789,6 @@ public void testPruneRenameOnAggBy() {
     }
 
     /**
-<<<<<<< HEAD
-     * Expects
-     * Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
-     * \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
-     *   \_Limit[1000[INTEGER]]
-     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
-     *       \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
-     */
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634")
-    public void testEliminateDuplicateAggsCountAll() {
-        var plan = plan("""
-              from test
-            | stats c1 = count(1), c2 = count(2), cs = count(*), cm = count(), cexp = count("123")
-            """);
-
-        var project = as(plan, Project.class);
-        assertThat(Expressions.names(project.projections()), contains("c1", "c2", "cs", "cm", "cexp"));
-        var eval = as(project.child(), Eval.class);
-        var fields = eval.fields();
-        assertThat(Expressions.names(fields), contains("c2", "cs", "cm", "cexp"));
-        for (Alias field : fields) {
-            assertThat(Expressions.name(field.child()), is("c1"));
-        }
-        var limit = as(eval.child(), Limit.class);
-        var agg = as(limit.child(), Aggregate.class);
-        var aggs = agg.aggregates();
-        assertThat(Expressions.names(aggs), contains("c1"));
-        aggFieldName(aggs.get(0), Count.class, "*");
-        var source = as(agg.child(), EsRelation.class);
-    }
-
-    /**
-     * Expects
-     * Project[[c1{r}#7, cx{r}#10, cs{r}#12, cy{r}#15]]
-     * \_Eval[[c1{r}#7 AS cx, c1{r}#7 AS cs, c1{r}#7 AS cy]]
-     *   \_Limit[1000[INTEGER]]
-     *     \_Aggregate[[],[COUNT([2a][KEYWORD]) AS c1]]
-     *       \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
-     */
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634")
-    public void testEliminateDuplicateAggsWithAliasedFields() {
-        var plan = plan("""
-              from test
-            | eval x = 1
-            | eval y = x
-            | stats c1 = count(1), cx = count(x), cs = count(*), cy = count(y)
-            """);
-
-        var project = as(plan, Project.class);
-        assertThat(Expressions.names(project.projections()), contains("c1", "cx", "cs", "cy"));
-        var eval = as(project.child(), Eval.class);
-        var fields = eval.fields();
-        assertThat(Expressions.names(fields), contains("cx", "cs", "cy"));
-        for (Alias field : fields) {
-            assertThat(Expressions.name(field.child()), is("c1"));
-        }
-        var limit = as(eval.child(), Limit.class);
-        var agg = as(limit.child(), Aggregate.class);
-        var aggs = agg.aggregates();
-        assertThat(Expressions.names(aggs), contains("c1"));
-        aggFieldName(aggs.get(0), Count.class, "*");
-        var source = as(agg.child(), EsRelation.class);
-    }
-
-    /**
-     * Expects
-     * Project[[min{r}#1385, max{r}#1388, min{r}#1385 AS min2, max{r}#1388 AS max2, gender{f}#1398]]
-     * \_Limit[1000[INTEGER]]
-     *   \_Aggregate[[gender{f}#1398],[MIN(salary{f}#1401) AS min, MAX(salary{f}#1401) AS max, gender{f}#1398]]
-     *     \_EsRelation[test][_meta_field{f}#1402, emp_no{f}#1396, first_name{f}#..]
-     */
-    public void testEliminateDuplicateAggsMixed() {
-        var plan = plan("""
-              from test
-            | stats min = min(salary), max = max(salary), min2 = min(salary), max2 = max(salary) by gender
-            """);
-
-        var project = as(plan, Project.class);
-        var projections = project.projections();
-        assertThat(Expressions.names(projections), contains("min", "max", "min2", "max2", "gender"));
-        as(projections.get(0), ReferenceAttribute.class);
-        as(projections.get(1), ReferenceAttribute.class);
-        assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("min"));
-        assertThat(Expressions.name(aliased(projections.get(3), ReferenceAttribute.class)), is("max"));
-
-        var limit = as(project.child(), Limit.class);
-        var agg = as(limit.child(), Aggregate.class);
-        var aggs = agg.aggregates();
-        assertThat(Expressions.names(aggs), contains("min", "max", "gender"));
-        aggFieldName(aggs.get(0), Min.class, "salary");
-        aggFieldName(aggs.get(1), Max.class, "salary");
-        var source = as(agg.child(), EsRelation.class);
-    }
-
-    /**
-     * Expects
-     * EsqlProject[[a{r}#5, c{r}#8]]
-     * \_Eval[[null[INTEGER] AS x]]
-     *   \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..]
-     */
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100634")
-    public void testEliminateDuplicateAggWithNull() {
-        var plan = plan("""
-              from test
-            | eval x = null + 1
-            | stats a = avg(x), c = count(x)
-            """);
-        fail("Awaits fix");
-    }
-
-    /**
-     * Expects
-     * Project[[max(x){r}#11, max(x){r}#11 AS max(y), max(x){r}#11 AS max(z)]]
-     * \_Limit[1000[INTEGER]]
-     *   \_Aggregate[[],[MAX(salary{f}#21) AS max(x)]]
-     *     \_EsRelation[test][_meta_field{f}#22, emp_no{f}#16, first_name{f}#17, ..]
-     */
-    public void testEliminateDuplicateAggsNonCount() {
-        var plan = plan("""
-            from test
-            | eval x = salary
-            | eval y = x
-            | eval z = y
-            | stats max(x), max(y), max(z)
-            """);
-
-        var project = as(plan, Project.class);
-        var projections = project.projections();
-        assertThat(Expressions.names(projections), contains("max(x)", "max(y)", "max(z)"));
-        as(projections.get(0), ReferenceAttribute.class);
-        assertThat(Expressions.name(aliased(projections.get(1), ReferenceAttribute.class)), is("max(x)"));
-        assertThat(Expressions.name(aliased(projections.get(2), ReferenceAttribute.class)), is("max(x)"));
-
-        var limit = as(project.child(), Limit.class);
-        var agg = as(limit.child(), Aggregate.class);
-        var aggs = agg.aggregates();
-        assertThat(Expressions.names(aggs), contains("max(x)"));
-        aggFieldName(aggs.get(0), Max.class, "salary");
-        var source = as(agg.child(), EsRelation.class);
-    }
-
-    /**
-     * Expects
-     * Limit[1000[INTEGER]]
-     * \_Aggregate[[salary{f}#12],[salary{f}#12, salary{f}#12 AS x]]
-     *   \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..]
-     */
-    public void testEliminateDuplicateRenamedGroupings() {
-        var plan = plan("""
-            from test
-            | eval x = salary
-            | stats by salary, x
-            """);
-
-        var limit = as(plan, Limit.class);
-        var agg = as(limit.child(), Aggregate.class);
-        var relation = as(agg.child(), EsRelation.class);
-
-        assertThat(Expressions.names(agg.groupings()), contains("salary"));
-        assertThat(Expressions.names(agg.aggregates()), contains("salary", "x"));
-    }
-
-    /**
-=======
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Expected
      * Limit[2[INTEGER]]
      * \_Filter[a{r}#6 > 2[INTEGER]]
@@ -4832,81 +4636,6 @@ public void testStatsExpOverAggsWithScalars() {
 
     /**
      * Expects
-<<<<<<< HEAD
-     * Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
-     * >$MAX$1{r}#45 AS g]]
-     * \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
-     * salary)_+_m>$AVG$0{r}#44 + $$avg(salary)_+_m>$MAX$1{r}#45 AS a, $$avg(salary)_+_m>$MAX$1{r}#45 + 3[INTEGER] +
-     * 3.141592653589793[DOUBLE] + $$max(salary)_+_3>$COUNT$2{r}#46 AS b]]
-     *   \_Limit[1000[INTEGER]]
-     *     \_Aggregate[[w{r}#28],[SUM(salary{f}#39) AS $$$$avg(salary)_+_m>$AVG$0$SUM$0, MAX(salary{f}#39) AS $$avg(salary)_+_m>$MAX$1
-     * , COUNT(salary{f}#39) AS $$max(salary)_+_3>$COUNT$2, MIN(salary{f}#39) AS $$count(salary)_->$MIN$3]]
-     *       \_Eval[[languages{f}#37 % 2[INTEGER] AS w]]
-     *         \_EsRelation[test][_meta_field{f}#40, emp_no{f}#34, first_name{f}#35, ..]
-     */
-    public void testStatsExpOverAggsWithScalarAndDuplicateAggs() {
-        var plan = optimizedPlan("""
-            from test
-            | stats a = avg(salary) + max(salary),
-                    b = max(salary) + 3 + PI() + count(salary),
-                    c = count(salary) - min(salary),
-                    d = count(salary),
-                    e = min(salary),
-                    f = max(salary),
-                    g = max(salary)
-                    by w = languages % 2
-            | keep a, b, d, e, g
-            """);
-
-        var project = as(plan, Project.class);
-        var projections = project.projections();
-        assertThat(Expressions.names(projections), contains("a", "b", "d", "e", "g"));
-        var refA = Alias.unwrap(projections.get(0));
-        var refB = Alias.unwrap(projections.get(1));
-        var refD = Alias.unwrap(projections.get(2));
-        var refE = Alias.unwrap(projections.get(3));
-        var refG = Alias.unwrap(projections.get(4));
-
-        var eval = as(project.child(), Eval.class);
-        var fields = eval.fields();
-        // avg = Sum/Count
-        assertThat(Expressions.name(fields.get(0)), containsString("AVG"));
-        assertThat(Alias.unwrap(fields.get(0)), instanceOf(Div.class));
-        // avg + max
-        assertThat(Expressions.name(fields.get(1)), is("a"));
-        var add = as(Alias.unwrap(fields.get(1)), Add.class);
-        var max_salary = add.right();
-        assertThat(Expressions.attribute(fields.get(1)), is(Expressions.attribute(refA)));
-
-        assertThat(Expressions.name(fields.get(2)), is("b"));
-        assertThat(Expressions.attribute(fields.get(2)), is(Expressions.attribute(refB)));
-
-        add = as(Alias.unwrap(fields.get(2)), Add.class);
-        add = as(add.left(), Add.class);
-        add = as(add.left(), Add.class);
-        assertThat(Expressions.attribute(max_salary), is(Expressions.attribute(add.left())));
-
-        var limit = as(eval.child(), Limit.class);
-
-        var agg = as(limit.child(), Aggregate.class);
-        var aggs = agg.aggregates();
-        var sum = as(Alias.unwrap(aggs.get(0)), Sum.class);
-
-        assertThat(Expressions.attribute(aggs.get(1)), is(Expressions.attribute(max_salary)));
-        var max = as(Alias.unwrap(aggs.get(1)), Max.class);
-        var count = as(Alias.unwrap(aggs.get(2)), Count.class);
-        var min = as(Alias.unwrap(aggs.get(3)), Min.class);
-
-        eval = as(agg.child(), Eval.class);
-        fields = eval.fields();
-        assertThat(Expressions.name(fields.get(0)), is("w"));
-    }
-
-    /**
-     * Expects
-=======
-     * 
{@code
->>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
      * Project[[a{r}#5, a{r}#5 AS b, w{r}#12]]
      * \_Limit[1000[INTEGER]]
      *   \_Aggregate[[w{r}#12],[SUM($$salary_/_2_+_la>$SUM$0{r}#26) AS a, w{r}#12]]
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
index e656cc364691a..4f3672548e45f 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/DeduplicateAggsTests.java
@@ -583,7 +583,7 @@ public void testDuplicatedAggWithFoldableIdenticalExpressions() {
     public void testDuplicatedInlineAggWithFoldableIdenticalExpressions() {
         String query = """
                 FROM airports
-                | INLINE STATS a = 2*COUNT_DISTINCT(scalerank, 100),
+                | INLINESTATS a = 2*COUNT_DISTINCT(scalerank, 100),
                 b = 2*COUNT_DISTINCT(scalerank, 220 - 150 + 30),
                 c = 2*COUNT_DISTINCT(scalerank, 1 + 200 - 80 - 20 - 1)
             """;