From 5159109568662a0f7176bc6645cfd0c6cf4e015d Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Mon, 8 Jul 2024 10:42:02 +0300 Subject: [PATCH 01/16] Add separate rule for dealing with nulls in aggregations Duplicate SubstituteSurrogate in "Operator Optimization" batch Many more tests Add tests for mad Add mv handling to top function --- .../src/main/resources/meta.csv-spec | 2 +- .../src/main/resources/row.csv-spec | 38 ++++-- .../src/main/resources/spatial.csv-spec | 9 ++ .../src/main/resources/stats.csv-spec | 70 +++++++++++ .../src/main/resources/stats_mad.csv-spec | 9 ++ .../main/resources/stats_percentile.csv-spec | 19 +++ .../src/main/resources/stats_top.csv-spec | 49 ++++++++ .../function/aggregate/CountDistinct.java | 6 + .../expression/function/aggregate/Top.java | 20 +++- .../expression/function/aggregate/Values.java | 8 +- .../function/scalar/multivalue/MvSlice.java | 2 +- .../esql/optimizer/LogicalPlanOptimizer.java | 3 + .../rules/PropagateEvalFoldables.java | 18 +++ .../esql/optimizer/rules/PruneColumns.java | 2 +- .../rules/ReplaceAggregatesWithNull.java | 111 ++++++++++++++++++ .../optimizer/rules/SubstituteSurrogates.java | 15 ++- .../optimizer/LogicalPlanOptimizerTests.java | 2 +- 17 files changed, 357 insertions(+), 26 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index f1f66a9cb990c..4d535851ce194 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -304,7 +304,7 @@ mv_median |Converts a multivalued field into a single valued field containin mv_min |Converts a multivalued expression into a single valued column containing the minimum value. mv_percentile |Converts a multivalued field into a single valued field containing the value at which a certain percentage of observed values occur. mv_pseries_wei|Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum. -mv_slice |Returns a subset of the multivalued field using the start and end index values. +mv_slice |Returns a subset of the multivalued field using the start and end index values. The function uses 0-based indexing. mv_sort |Sorts a multivalued field in lexicographical order. mv_sum |Converts a multivalued field into a single valued field containing the sum of all of the values. mv_zip |Combines the values from two multivalued fields with a delimiter that joins them together. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index da640b6306299..02e58832c5528 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -81,10 +81,10 @@ a:integer | b:integer | c:integer ; evalRowWithNull -row a = 1, b = 2, c = null | eval z = c+b+a; +row a = 1, b = 2, c = null | eval z = c+b+a, t = a + null, u = c + null; -a:integer | b:integer | c:null | z:integer -1 | 2 | null | null +a:integer | b:integer | c:null | z:integer | t:integer | u:null +1 | 2 | null | null | null | null ; evalRowWithNull2 @@ -96,10 +96,10 @@ a:integer | b:integer | c:null | null:null | z:integer ; evalRowWithNull3 -row a = 1, b = 2, x = round(null) | eval z = a+b+x; +row a = 1, b = 2, x = round(null) | eval z = a+b+x, t = a + round(null); -a:integer | b:integer | x:null | z:integer -1 | 2 | null | null +a:integer | b:integer | x:null | z:integer | t:integer +1 | 2 | null | null | null ; evalRowWithRound @@ -251,6 +251,13 @@ avg:double | min(x):integer | max(x):integer | count(x):long | avg(x):double | a 8.0 | 8 | 8 | 1 | 8.0 | 5.0 | 4.0 ; +rowWithMultipleStats2 +row a = 1+3 | eval a = 1 + a | stats avg = avg(1+3), min(5*2), max(3*3), count(123-123), avg(1-123), avg(a+123), count_distinct(5+6); + +avg:double | min(5*2):integer | max(3*3):integer |count(123-123):long |avg(1-123):double |avg(a+123):double |count_distinct(5+6):long +4.0 |10 |9 |1 |-122.0 |128.0 |1 +; + rowWithMultipleStatsOverNull row x=1, y=2 | eval tot = null + y + x | stats c=count(tot), a=avg(tot), mi=min(tot), ma=max(tot), s=sum(tot); @@ -258,20 +265,27 @@ c:long | a:double | mi:integer | ma:integer | s:long 0 | null | null | null | null ; +rowWithMultipleStatsOverNull2 +row x=1, y=2 | stats c=count(null + 1), c_d=count_distinct(1 + null), a=avg(null + x), mi=min(y + null), ma=max(y + x * null), s=sum(null); + + c:long | c_d:long | a:double | mi:integer | ma:integer | s:double +0 |0 |null |null |null |null +; + min -row l=1, d=1.0, ln=1 + null, dn=1.0 + null | stats min(l), min(d), min(ln), min(dn); +row l=1, d=1.0, ln=1 + null, dn=1.0 + null, n=null | stats min(l), min(d), min(ln), min(dn), n1=min(null), n2=min(null+123), n3=min(n); -min(l):integer | min(d):double | min(ln):integer | min(dn):double - 1 | 1.0 | null | null +min(l):integer | min(d):double | min(ln):integer | min(dn):double | n1:null | n2:integer | n3:null + 1 | 1.0 | null | null | null | null | null ; sum -row l=1, d=1.0, ln=1 + null, dn=1.0 + null | stats sum(l), sum(d), sum(ln), sum(dn); +row l=1, d=1.0, ln=1 + null, dn=1.0 + null, n=null | stats sum(l), sum(d), sum(ln), sum(dn), s1=sum(null), s2=sum(123-null), s3=sum(n); -sum(l):long | sum(d):double | sum(ln):long | sum(dn):double - 1 | 1.0 | null | null +sum(l):long | sum(d):double | sum(ln):long | sum(dn):double | s1:double | s2:long | s3:double + 1 | 1.0 | null | null | null | null | null ; boolean diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 35416c7945128..1f71eff925174 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -37,6 +37,14 @@ wkt:keyword ["POINT(42.97109630194 14.7552534413725)", "POINT(75.8092915005895 22.727749187571)"] |[POINT(42.97109630194 14.7552534413725), POINT(75.8092915005895 22.727749187571)] ; +centroidOfNull +FROM airports | eval z = TO_GEOPOINT(null) | STATS centroidNull = ST_CENTROID_AGG(null), centroidExpNull = ST_CENTROID_AGG(TO_GEOPOINT(null::string)), centroidEvalNull = ST_CENTROID_AGG(z); + +centroidNull:null|centroidExpNull:geo_point|centroidEvalNull:geo_point +null |null |null +; + +########### failing :-( with InvalidArgumentException: Does not support yet aggregations over constants centroidFromStringNested required_capability: st_centroid_agg @@ -1304,6 +1312,7 @@ wkt:keyword |pt:cartesian_point ["POINT(4297.11 -1475.53)", "POINT(7580.93 2272.77)"] |[POINT(4297.11 -1475.53), POINT(7580.93 2272.77)] ; +########### failing :-( with InvalidArgumentException: Does not support yet aggregations over constants centroidCartesianFromStringNested required_capability: st_centroid_agg diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 3be846630d5b8..81bf222008b32 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1991,6 +1991,67 @@ s2point1:l | s_mv:l | s_param:l | s_expr:l | s_expr_null:l | languages:i 1 | 4 | 4 | 1 | 0 | null ; +emptyStatsBy1 +from employees | eval x = [1,2,3] | stats by x | eval z = case(null is null, null, 5); + +x:integer |z:integer +1 |null +2 |null +3 |null +; + +emptyStatsBy2 +from employees | eval x = [1,2,3], y = null | stats max(y) by x; + +max(y):null |x:integer +null |1 +null |2 +null |3 +; + +statsOfPropagateableConst +row foo="unused" | eval mv=[1,2,3] | stats avg = avg(mv), avg([5,6]), min(mv), min([5,6]), max(mv), max([5,6]), count(mv), count([5,6]), count_distinct(mv), count_distinct([5,5,6]); + + avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long +2.0 |5.5 |1 |5 |3 |6 |3 |2 |3 |2 +; + +statsOfPropagateableConstWithGrouping_Count +ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a; + +COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer +0 |0 |0 |1 +0 |0 |0 |2 +0 |0 |0 |3 +; + +statsOfPropagateableConstWithGrouping_DistinctCount +ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT_DISTINCT(c), COUNT_DISTINCT(null), COUNT_DISTINCT(null - 1) BY a; + +COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a:integer +0 |0 |0 |1 +0 |0 |0 |2 +0 |0 |0 |3 +; + +countDistinctWithGrouping +from employees | EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] | STATS `cd(5+6)`=COUNT_DISTINCT(c), `cd(null)`=COUNT_DISTINCT(null), `cd(null-1)`=COUNT_DISTINCT(null - 1), `cd(eval_null)`=COUNT_DISTINCT(d), `cd(mv)`=COUNT_DISTINCT(e) BY gender | sort gender; + +cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long |gender:keyword +1 |0 |0 |0 |2 |F +1 |0 |0 |0 |2 |M +1 |0 |0 |0 |2 |null +; + +countWithGrouping +from employees | EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] | STATS `cd(5+6)`=COUNT(c), `cd(null)`=COUNT(null), `cd(null-1)`=COUNT(null - 1), `cd(eval_null)`=COUNT(d), `cd(mv)`=COUNT(e) BY gender | sort gender; + +cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long |gender:keyword +33 |0 |0 |0 |99 |F +57 |0 |0 |0 |171 |M +10 |0 |0 |0 |30 |null +; + evalOverridingKey#[skip:-8.13.1,reason:fixed in 8.13.2] FROM employees | EVAL k = languages @@ -2290,3 +2351,12 @@ from employees m:integer |a:double |x:integer 74999 |48249.0 |0 ; + +valuesOfConstants +row mv_int = [1,2,3], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null +| stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null) +; + +values(null):null|values(mv_int):integer|values(string):keyword|values(mv_string):keyword|values(null_exp):integer|values(123 + null):integer|values(to_be_casted_ip::ip):ip|values(row_null):null +null |[1, 2, 3] |abc |[bar, foo] |null |null |127.0.0.1 |null +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec new file mode 100644 index 0000000000000..3977325ab3cb2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec @@ -0,0 +1,9 @@ +madOfNulls +row x = null::integer, y = null, z = 123 - null, a = [1,2,3], b = 5 | stats x = MEDIAN_ABSOLUTE_DEVIATION(x), y = MEDIAN_ABSOLUTE_DEVIATION(y), z = MEDIAN_ABSOLUTE_DEVIATION(null), madnull = MEDIAN_ABSOLUTE_DEVIATION(2+null), madnullEvalNull = MEDIAN_ABSOLUTE_DEVIATION(z) by a, b; + +x:double|y:double |z:double |madnull:double |madnullEvalNull:double|a:integer |b:integer +null |null |null |null |null |1 |5 +null |null |null |null |null |2 |5 +null |null |null |null |null |3 |5 +; + diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index 2ac7a0cf6217a..73a8fc0e2c704 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -1,3 +1,22 @@ +percentileOfNull +row x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(y, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90); + + percIntNull:double|percNull:double|percentile(null, 90):double|percentile(null+2, 90):double|percentile(z, 90):double +null |null |null |null |null +; + +percentileOfNullsOnRealIndex +from employees | eval x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(y, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90) by languages | sort languages desc; + + percIntNull:double|percNull:double|percentile(null, 90):double|percentile(null+2, 90):double|percentile(z, 90):double|languages:integer +null |null |null |null |null |null +null |null |null |null |null |5 +null |null |null |null |null |4 +null |null |null |null |null |3 +null |null |null |null |null |2 +null |null |null |null |null |1 +; + percentileOfLong from employees | stats p0 = percentile(salary_change.long, 0), p50 = percentile(salary_change.long, 50), p99 = percentile(salary_change.long, 99); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index 86f91adf506d1..b4162b8b7a925 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -40,6 +40,55 @@ date:date | double:double | integer:integer | long:long [1999-04-30T00:00:00.000Z,1997-05-19T00:00:00.000Z] | [14.74,14.68] | [74999,74970] | [14,14] ; +topRowWithEval +required_capability: agg_top +ROW d=-9.81, i=25324, l=TO_LONG(-9) | EVAL x=d+1, y=i/2, z=l+3 | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 2, "asc"), long = TOP(z, 2, "asc") | keep double, integer, long +; + + double:double |integer:integer| long:long +-8.81 |12662 |-6 +; + +topRowWithNull +required_capability: agg_top +ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc") | keep double, long, integer, null1, null2 +; + +double:double |long:long |integer:integer| null1:null |null2:null +null |-6 |null |null |null +; + +topRowWithNull_WithBY +required_capability: agg_top +ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null, `by` = [1,2,3] | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc") BY `by` | keep double, long, integer, null1, null2, `by` +; + +double:double |long:long |integer:integer| null1:null |null2:null | by:integer +null |-6 |null |null |null |1 +null |-6 |null |null |null |2 +null |-6 |null |null |null |3 +; + +topRowWithMultiValues +required_capability: agg_top +row x = [1,2,3,4,5], z = [1,2,3,4,5] | eval y = mv_slice(x, 1, 3) | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") +; + +z2a:integer|z3d:integer|y2a:integer|y2d:integer +[1, 2] |[5, 4, 3] |[2, 3] |[4, 3] +; + +topRowWithMultiValues_WithBY +required_capability: agg_top +row x = [1,2,3,4,5], z = [1,2,3,4,5] | eval y = mv_slice(x, 1, 3) | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") by y +; + +z2a:integer|z3d:integer|y2a:integer|y2d:integer| y:integer +[1, 2] |[5, 4, 3] |[2, 3] |[4, 3] | 2 +[1, 2] |[5, 4, 3] |[2, 3] |[4, 3] | 3 +[1, 2] |[5, 4, 3] |[2, 3] |[4, 3] | 4 +; + topAllTypesRow required_capability: agg_top ROW diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 858c6e659449c..dfde67985635f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -151,6 +152,11 @@ public DataType dataType() { return DataType.LONG; } + @Override + public Nullability nullable() { + return Nullability.FALSE; + } + @Override protected TypeResolution resolveType() { if (childrenResolved() == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java index 4927acc3e1cd9..63753d11d8d17 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java @@ -19,6 +19,7 @@ import org.elasticsearch.compute.aggregation.TopLongAggregatorFunctionSupplier; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -26,6 +27,10 @@ import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMax; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSlice; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSort; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.planner.ToAggregator; @@ -197,11 +202,18 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { public Expression surrogate() { var s = source(); - if (limitValue() == 1) { - if (orderValue()) { - return new Min(s, field()); + if (field().foldable()) { + if (limitValue() == 1) { + if (orderValue()) { + return new MvMin(s, field()); + } else { + return new MvMax(s, field()); + } } else { - return new Max(s, field()); + var start = new Literal(s, 0, DataType.INTEGER); + var end = new Literal(s, limitValue() - 1, DataType.INTEGER); + MvSort sort = new MvSort(s, field(), new Literal(s, orderValue() ? ORDER_ASC : ORDER_DESC, DataType.KEYWORD)); + return new MvSlice(s, sort, start, end); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java index 136e1233601f9..867efb44f7338 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.SurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -32,7 +33,7 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; -public class Values extends AggregateFunction implements ToAggregator { +public class Values extends AggregateFunction implements ToAggregator, SurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Values", Values::new); @FunctionInfo( @@ -115,4 +116,9 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { // TODO cartesian_point, geo_point throw EsqlIllegalArgumentException.illegalDataType(type); } + + @Override + public Expression surrogate() { + return field().foldable() ? field() : null; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java index c332e94b20049..f06059f007fa5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java @@ -69,7 +69,7 @@ public class MvSlice extends EsqlScalarFunction implements OptionalArgument, Eva "long", "text", "version" }, - description = "Returns a subset of the multivalued field using the start and end index values.", + description = "Returns a subset of the multivalued field using the start and end index values. The function uses 0-based indexing.", examples = { @Example(file = "ints", tag = "mv_slice_positive"), @Example(file = "ints", tag = "mv_slice_negative") } ) public MvSlice( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 282f46e0de7bb..cb7c86622c61a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -54,6 +54,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEval; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownRegexExtract; import org.elasticsearch.xpack.esql.optimizer.rules.RemoveStatsOverride; +import org.elasticsearch.xpack.esql.optimizer.rules.ReplaceAggregatesWithNull; import org.elasticsearch.xpack.esql.optimizer.rules.ReplaceAliasingEvalWithProject; import org.elasticsearch.xpack.esql.optimizer.rules.ReplaceLimitAndSortAsTopN; import org.elasticsearch.xpack.esql.optimizer.rules.ReplaceLookupWithJoin; @@ -198,9 +199,11 @@ protected static Batch operators() { new PropagateEmptyRelation(), new ConvertStringToByteRef(), new FoldNull(), + new ReplaceAggregatesWithNull(), new SplitInWithFoldableValue(), new PropagateEvalFoldables(), new ConstantFolding(), + new SubstituteSurrogates(), new PartiallyFoldCase(), // boolean new BooleanSimplification(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java index 9231105c9b663..a7bf8694f3e89 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java @@ -11,12 +11,18 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.rule.Rule; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Filter; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import java.util.ArrayList; +import java.util.List; + /** * Replace any reference attribute with its source, if it does not affect the result. * This avoids ulterior look-ups between attributes and its source across nodes. @@ -52,6 +58,18 @@ public LogicalPlan apply(LogicalPlan plan) { // C.f. https://github.com/elastic/elasticsearch/issues/100634 if (p instanceof Filter || p instanceof Eval) { p = p.transformExpressionsOnly(ReferenceAttribute.class, replaceReference); + } else if (p instanceof Aggregate agg) { + List newAggs = new ArrayList<>(agg.aggregates().size()); + agg.aggregates().forEach(e -> { + if (Alias.unwrap(e) instanceof AggregateFunction) { + newAggs.add((NamedExpression) e.transformUp(ReferenceAttribute.class, replaceReference)); + } else { + newAggs.add(e); + } + }); + if (agg.aggregates().equals(newAggs) == false) { + p = new Aggregate(agg.source(), agg.child(), agg.aggregateType(), agg.groupings(), newAggs); + } } return p; }); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PruneColumns.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PruneColumns.java index baeabb534aa3c..8d59163381126 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PruneColumns.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PruneColumns.java @@ -116,7 +116,7 @@ public LogicalPlan apply(LogicalPlan plan) { /** * Prunes attributes from the list not found in the given set. - * Returns null if no changed occurred. + * Returns null if no changes occurred. */ private static List removeUnused(List named, AttributeSet used) { var clone = new ArrayList<>(named); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java new file mode 100644 index 0000000000000..77b5e563befc1 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java @@ -0,0 +1,111 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules; + +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.esql.expression.function.aggregate.CountDistinct; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; +import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * Looks for any aggregates functions (max(field), min(field)) that act on a foldable-to-null expression. + * Except COUNT_DISTINCT and COUNT (which should return 0), all other aggregate functions should return null. + * + * This applies to eval x = null | stats max(x) but also max(null) or max(2 + null). + * All aggregate functions that are also nullable (COUNT_DISTINCT and COUNT are exceptions), will get a NULL + * field replacement by the FoldNull rule, COUNT_DISTINCT will benefit from PropagateEvalFoldables. + */ +public final class ReplaceAggregatesWithNull extends OptimizerRules.OptimizerRule { + + public ReplaceAggregatesWithNull() { + super(OptimizerRules.TransformDirection.UP); + } + + @Override + protected LogicalPlan rule(Aggregate aggregate) { + var aggs = aggregate.aggregates(); + List newAggs = new ArrayList<>(aggs.size()); + boolean changed = false; + List transientEval = new ArrayList<>(); + + for (NamedExpression agg : aggs) { + Expression e = Alias.unwrap(agg); + Object value = null; + boolean isNewAgg = false; + if (e instanceof AggregateFunction af + && af.field().foldable() + && (DataType.isNull(af.field().dataType()) + || af.field() == null + || af.field() instanceof Literal lit && lit.value() == null)) { + isNewAgg = true; + if (af instanceof CountDistinct || af instanceof Count) { + value = 0L; + } + } else if (e instanceof Literal literal && literal.value() == null) { + isNewAgg = true; + } + + if (isNewAgg) { + /* + * Add an eval for every null (even if they are all "null"s, they can have different data types, + * depending on the Aggregate function return type). + * Also, copy the original alias id so that other nodes using it down stream (e.g. eval referring to the original agg) + * don't have to be updated. PruneColumns makes use of the Attribute ids to decide if unused references can be removed. + */ + var aliased = new Alias(agg.source(), agg.name(), Literal.of(agg, value), agg.toAttribute().id(), true); + transientEval.add(aliased); + changed = true; + } else { + newAggs.add(agg); + } + } + + LogicalPlan plan = aggregate; + if (changed) { + var source = aggregate.source(); + if (newAggs.isEmpty() == false) { + plan = new Aggregate(source, aggregate.child(), aggregate.aggregateType(), aggregate.groupings(), newAggs); + } else { + // All aggs actually have been optimized away + // \_Aggregate[[],[AVG([NULL][NULL]) AS s]] + // Replace by a local relation with one row, followed by an eval, e.g. + // \_Eval[[MVAVG([NULL][NULL]) AS s]] + // \_LocalRelation[[{e}#21],[ConstantNullBlock[positions=1]]] + plan = new LocalRelation( + source, + List.of(new EmptyAttribute(source)), + LocalSupplier.of(new Block[] { BlockUtils.constantBlock(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, null, 1) }) + ); + } + if (transientEval.isEmpty() == false) { + plan = new Eval(source, plan, transientEval); + plan = new Project(source, plan, Expressions.asAttributes(aggs)); + } + } + + return plan; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java index 4c5a04ff8353c..f42b1ddbd3560 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java @@ -34,7 +34,6 @@ import java.util.Map; public final class SubstituteSurrogates extends OptimizerRules.OptimizerRule { - // TODO: currently this rule only works for aggregate functions (AVG) public SubstituteSurrogates() { super(OptimizerRules.TransformDirection.UP); @@ -44,21 +43,27 @@ public SubstituteSurrogates() { protected LogicalPlan rule(Aggregate aggregate) { var aggs = aggregate.aggregates(); List newAggs = new ArrayList<>(aggs.size()); - // existing aggregate and their respective attributes - Map aggFuncToAttr = new HashMap<>(); - // surrogate functions eval - List transientEval = new ArrayList<>(); + Map aggFuncToAttr = new HashMap<>(); // existing aggregate and their respective attributes + List transientEval = new ArrayList<>(); // surrogate functions eval boolean changed = false; + boolean hasSurrogates = false; // first pass to check existing aggregates (to avoid duplication and alias waste) for (NamedExpression agg : aggs) { if (Alias.unwrap(agg) instanceof AggregateFunction af) { if ((af instanceof SurrogateExpression se && se.surrogate() != null) == false) { aggFuncToAttr.put(af, agg.toAttribute()); + } else { + hasSurrogates = true; } } } + // no surrogates, return early + if (hasSurrogates == false) { + return aggregate; + } + int[] counter = new int[] { 0 }; // 0. check list of surrogate expressions for (NamedExpression agg : aggs) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 74f95e3defbd3..3ecdb4634938b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -5091,7 +5091,7 @@ public void testNullFoldingDoesNotApplyOnAggregate() throws Exception { CountDistinct countd = new CountDistinct(EMPTY, getFieldAttribute("a"), getFieldAttribute("a")); assertEquals(countd, rule.rule(countd)); countd = new CountDistinct(EMPTY, NULL, NULL); - assertEquals(new Literal(EMPTY, null, LONG), rule.rule(countd)); + assertEquals(countd, rule.rule(countd)); Median median = new Median(EMPTY, getFieldAttribute("a")); assertEquals(median, rule.rule(median)); From 74129a0df03ccd14a467ed41388765c96b0e54cf Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Fri, 30 Aug 2024 16:25:24 +0300 Subject: [PATCH 02/16] One more test from one of the reported bugs --- .../esql/qa/testFixtures/src/main/resources/row.csv-spec | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 02e58832c5528..77668939a14b2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -272,6 +272,13 @@ row x=1, y=2 | stats c=count(null + 1), c_d=count_distinct(1 + null), a=avg(null 0 |0 |null |null |null |null ; +rowWithMultipleStatsOverNull3 +ROW a = 1, b = "two", c = null | STATS `a_count` = COUNT(`a`), `a_cardinality` = COUNT_DISTINCT(`a`),`b_count` = COUNT(`b`), `b_cardinality` = COUNT_DISTINCT(`b`),`c_count` = COUNT(`c`), `c_cardinality` = COUNT_DISTINCT(`c`) +; + +a_count:long |a_cardinality:long |b_count:long |b_cardinality:long |c_count:long | c_cardinality:long +1 |1 |1 |1 |0 |0 +; min row l=1, d=1.0, ln=1 + null, dn=1.0 + null, n=null | stats min(l), min(d), min(ln), min(dn), n1=min(null), n2=min(null+123), n3=min(n); From ef3960e403700c91731e063831be2711a94bbac8 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Mon, 9 Sep 2024 11:03:37 +0300 Subject: [PATCH 03/16] Addressing reviews --- .../src/main/resources/meta.csv-spec | 2 +- .../src/main/resources/stats.csv-spec | 52 +++++++++++++++++-- .../main/resources/stats_percentile.csv-spec | 18 ++++++- .../src/main/resources/stats_top.csv-spec | 18 +++++-- .../function/aggregate/Percentile.java | 14 ++++- .../expression/function/aggregate/Values.java | 3 +- .../function/scalar/multivalue/MvSlice.java | 8 +-- .../esql/optimizer/LogicalPlanOptimizer.java | 2 + .../rules/PropagateEvalFoldables.java | 2 - .../rules/ReplaceAggregatesWithNull.java | 8 +-- .../xpack/esql/analysis/VerifierTests.java | 4 +- 11 files changed, 106 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index 4d535851ce194..c2f650fd89087 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -180,7 +180,7 @@ mv_median |number |"double|integer|long|unsigne mv_min |field |"boolean|date|date_nanos|double|integer|ip|keyword|long|text|unsigned_long|version" |Multivalue expression. mv_percentile |[number, percentile] |["double|integer|long", "double|integer|long"] |[Multivalue expression., The percentile to calculate. Must be a number between 0 and 100. Numbers out of range will return a null instead.] mv_pseries_wei|[number, p] |[double, double] |[Multivalue expression., It is a constant number that represents the 'p' parameter in the P-Series. It impacts every element's contribution to the weighted sum.] -mv_slice |[field, start, end] |["boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version", integer, integer]|[Multivalue expression. If `null`\, the function returns `null`., Start position. If `null`\, the function returns `null`. The start argument can be negative. An index of -1 is used to specify the last value in the list., End position(included). Optional; if omitted\, the position at `start` is returned. The end argument can be negative. An index of -1 is used to specify the last value in the list.] +mv_slice |[field, start, end] |["boolean|cartesian_point|cartesian_shape|date|double|geo_point|geo_shape|integer|ip|keyword|long|text|version", integer, integer]|[Multivalue expression. If `null`\, the function returns `null`., Start position (inclusive). If `null`\, the function returns `null`. The start argument can be negative. An index of -1 is used to specify the last value in the list., End position (inclusive). Optional; if omitted\, the position at `start` is returned. The end argument can be negative. An index of -1 is used to specify the last value in the list.] mv_sort |[field, order] |["boolean|date|double|integer|ip|keyword|long|text|version", keyword] |[Multivalue expression. If `null`\, the function returns `null`., Sort order. The valid options are ASC and DESC\, the default is ASC.] mv_sum |number |"double|integer|long|unsigned_long" |Multivalue expression. mv_zip |[string1, string2, delim] |["keyword|text", "keyword|text", "keyword|text"] |[Multivalue expression., Multivalue expression., Delimiter. Optional; if omitted\, `\,` is used as a default delimiter.] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 81bf222008b32..a68eb5ee86802 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2016,6 +2016,13 @@ row foo="unused" | eval mv=[1,2,3] | stats avg = avg(mv), avg([5,6]), min(mv), m 2.0 |5.5 |1 |5 |3 |6 |3 |2 |3 |2 ; +statsOfPropagateableConstOnIndex +from employees | eval mv=[1,2,3] | stats avg = avg(mv), avg([5,6]), min(mv), min([5,6]), max(mv), max([5,6]), count(mv), count([5,6]), count_distinct(mv), count_distinct([5,5,6]); + + avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long +2.0 |5.5 |1 |5 |3 |6 |300 |200 |3 |2 +; + statsOfPropagateableConstWithGrouping_Count ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a; @@ -2025,6 +2032,13 @@ COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer 0 |0 |0 |3 ; +statsOfPropagateableConstWithGroupingByFoldableNull +ROW a = 1+null, c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a; + +COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer +0 |0 |0 |null +; + statsOfPropagateableConstWithGrouping_DistinctCount ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT_DISTINCT(c), COUNT_DISTINCT(null), COUNT_DISTINCT(null - 1) BY a; @@ -2044,9 +2058,9 @@ cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long | ; countWithGrouping -from employees | EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] | STATS `cd(5+6)`=COUNT(c), `cd(null)`=COUNT(null), `cd(null-1)`=COUNT(null - 1), `cd(eval_null)`=COUNT(d), `cd(mv)`=COUNT(e) BY gender | sort gender; +from employees | EVAL a = 5 + 6, b = 6 + null, e = [1,2,2] | STATS `c(5+6)`=COUNT(a), `c(null)`=COUNT(null), `c(null-1)`=COUNT(null - 1), `c(eval_null)`=COUNT(b), `c(mv)`=COUNT(e) BY gender | sort gender; -cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long |gender:keyword +c(5+6):long |c(null):long |c(null-1):long |c(eval_null):long |c(mv):long |gender:keyword 33 |0 |0 |0 |99 |F 57 |0 |0 |0 |171 |M 10 |0 |0 |0 |30 |null @@ -2353,10 +2367,42 @@ m:integer |a:double |x:integer ; valuesOfConstants -row mv_int = [1,2,3], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null +row mv_int = [1,2,3,2], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null | stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null) ; values(null):null|values(mv_int):integer|values(string):keyword|values(mv_string):keyword|values(null_exp):integer|values(123 + null):integer|values(to_be_casted_ip::ip):ip|values(row_null):null null |[1, 2, 3] |abc |[bar, foo] |null |null |127.0.0.1 |null ; + +byFoldableValueFromEval +from employees | eval y = 5 + 6 | stats max(salary) by y +; + +max(salary):integer|y:integer +74999 |11 +; + +byFoldableValueInline +from employees | stats max(salary) by 5+6 +; + +max(salary):integer| 5+6:integer +74999 |11 +; + +byFoldableValueInlineWithAlias +from employees | stats max(salary) by y=5+6 +; + +max(salary):integer| y:integer +74999 |11 +; + +byAndMaxFoldableValues +from employees | eval x = [1,2,3], y = 5 + 6 | stats max(x) by y +; + +max(x):integer|y:integer +3 |11 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index 73a8fc0e2c704..c70f458adf2e2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -5,8 +5,24 @@ row x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile null |null |null |null |null ; +percentileWithPropagateablePercentileArgument +row value200 = 200, x = null::integer, y = null, z = 123 - null, p90 = 80+10 | eval p100 = p90 + 10, p3_null = null + 2, p50 = 40+10 | stats percentile(value200, p90), percentile(value200, p100), percentile(value200, p3_null), percentile(value200, p50), percentile(p90, p90), percentile(p90, p50), percentile(p3_null, p3_null), percentile(p100, p3_null); + +percentile(value200, p90):double|percentile(value200, p100):double|percentile(value200, p3_null):double|percentile(value200, p50):double|percentile(p90, p90):double|percentile(p90, p50):double|percentile(p3_null, p3_null):double|percentile(p100, p3_null):double +200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null +; + +percentileWithPropagateablePercentileArgumentOnRealIndex +from employees | eval value200 = 200, x = null::integer, y = null, z = 123 - null, p90 = 80+10 | eval p100 = p90 + 10, p3_null = null + 2, p50 = 40+10 | stats percentile(value200, p90), percentile(value200, p100), percentile(value200, p3_null), percentile(value200, p50), percentile(p90, p90), percentile(p90, p50), percentile(p3_null, p3_null), percentile(p100, p3_null) by gender | sort gender; + +percentile(value200, p90):double|percentile(value200, p100):double|percentile(value200, p3_null):double|percentile(value200, p50):double|percentile(p90, p90):double|percentile(p90, p50):double|percentile(p3_null, p3_null):double|percentile(p100, p3_null):double|gender:keyword +200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null |F +200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null |M +200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null |null +; + percentileOfNullsOnRealIndex -from employees | eval x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(y, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90) by languages | sort languages desc; +from employees | eval x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(x, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90) by languages | sort languages desc; percIntNull:double|percNull:double|percentile(null, 90):double|percentile(null+2, 90):double|percentile(z, 90):double|languages:integer null |null |null |null |null |null diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index b4162b8b7a925..758be8a3d9cfe 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -51,11 +51,11 @@ ROW d=-9.81, i=25324, l=TO_LONG(-9) | EVAL x=d+1, y=i/2, z=l+3 | STATS double = topRowWithNull required_capability: agg_top -ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc") | keep double, long, integer, null1, null2 +ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc"), null3 = TOP(null, 10, "desc") | keep double, long, integer, null* ; -double:double |long:long |integer:integer| null1:null |null2:null -null |-6 |null |null |null +double:double |long:long |integer:integer| null1:null |null2:null |null3:null +null |-6 |null |null |null |null ; topRowWithNull_WithBY @@ -89,6 +89,18 @@ z2a:integer|z3d:integer|y2a:integer|y2d:integer| y:integer [1, 2] |[5, 4, 3] |[2, 3] |[4, 3] | 4 ; +topRowWithDuplicatesInMultiValues +row x = [1,2,3,2,4,5,5], z = [1,2,3,4,5] | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), x5a=top(x, 5, "asc"), x5d=top(x, 5, "desc") by x | sort x desc +; + + z2a:integer | z3d:integer | x5a:integer | x5d:integer | x:integer +[1, 2] |[5, 4, 3] |[1, 2, 2, 3, 4]|[5, 5, 4, 3, 2]|5 +[1, 2] |[5, 4, 3] |[1, 2, 2, 3, 4]|[5, 5, 4, 3, 2]|4 +[1, 2] |[5, 4, 3] |[1, 2, 2, 3, 4]|[5, 5, 4, 3, 2]|3 +[1, 2] |[5, 4, 3] |[1, 2, 2, 3, 4]|[5, 5, 4, 3, 2]|2 +[1, 2] |[5, 4, 3] |[1, 2, 2, 3, 4]|[5, 5, 4, 3, 2]|1 +; + topAllTypesRow required_capability: agg_top ROW diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java index 0d5dd4b66501c..3fffb7f6f862b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.expression.function.aggregate; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -14,11 +15,15 @@ import org.elasticsearch.compute.aggregation.PercentileDoubleAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.PercentileIntAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.PercentileLongAggregatorFunctionSupplier; +import org.elasticsearch.xpack.esql.capabilities.Validatable; +import org.elasticsearch.xpack.esql.common.Failure; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.Validations; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; @@ -34,7 +39,7 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; -public class Percentile extends NumericAggregate implements SurrogateExpression { +public class Percentile extends NumericAggregate implements SurrogateExpression, Validatable { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "Percentile", @@ -134,7 +139,12 @@ protected TypeResolution resolveType() { sourceText(), SECOND, "numeric except unsigned_long" - ).and(isFoldable(percentile, sourceText(), SECOND)); + ); + } + + @Override + public void validate(Failures failures) { + failures.add(Validations.isFoldable(percentile, sourceText(), SECOND)); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java index 867efb44f7338..efa42c8a2423e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java @@ -25,6 +25,7 @@ import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvDedupe; import org.elasticsearch.xpack.esql.planner.ToAggregator; import java.io.IOException; @@ -119,6 +120,6 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { @Override public Expression surrogate() { - return field().foldable() ? field() : null; + return field().foldable() ? new MvDedupe(this.source(), field()) : null; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java index f06059f007fa5..d1b374a424193 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java @@ -95,13 +95,13 @@ public MvSlice( @Param( name = "start", type = { "integer" }, - description = "Start position. If `null`, the function returns `null`. " + description = "Start position (inclusive). If `null`, the function returns `null`. " + "The start argument can be negative. An index of -1 is used to specify the last value in the list." ) Expression start, @Param( name = "end", type = { "integer" }, - description = "End position(included). Optional; if omitted, the position at `start` is returned. " + description = "End position (inclusive). Optional; if omitted, the position at `start` is returned. " + "The end argument can be negative. An index of -1 is used to specify the last value in the list.", optional = true ) Expression end @@ -243,10 +243,10 @@ static int adjustIndex(int oldOffset, int fieldValueCount, int first) { static void checkStartEnd(int start, int end) throws InvalidArgumentException { if (start > end) { - throw new InvalidArgumentException("Start offset is greater than end offset"); + throw new InvalidArgumentException("Start offset [{}] is greater than end offset [{}]", start, end); } if (start < 0 && end >= 0) { - throw new InvalidArgumentException("Start and end offset have different signs"); + throw new InvalidArgumentException("Start [{}] and end [{}] offsets have different signs", start, end); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index cb7c86622c61a..bc5c98f8b262c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -198,8 +198,10 @@ protected static Batch operators() { new PruneEmptyPlans(), new PropagateEmptyRelation(), new ConvertStringToByteRef(), + // nulls in expressions new FoldNull(), new ReplaceAggregatesWithNull(), + // constant folding related new SplitInWithFoldableValue(), new PropagateEvalFoldables(), new ConstantFolding(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java index a7bf8694f3e89..7e4523b82db5e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PropagateEvalFoldables.java @@ -54,8 +54,6 @@ public LogicalPlan apply(LogicalPlan plan) { plan = plan.transformUp(p -> { // Apply the replacement inside Filter and Eval (which shouldn't make a difference) - // TODO: also allow aggregates once aggs on constants are supported. - // C.f. https://github.com/elastic/elasticsearch/issues/100634 if (p instanceof Filter || p instanceof Eval) { p = p.transformExpressionsOnly(ReferenceAttribute.class, replaceReference); } else if (p instanceof Aggregate agg) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java index 77b5e563befc1..6ee2dc54c9042 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceAggregatesWithNull.java @@ -55,16 +55,12 @@ protected LogicalPlan rule(Aggregate aggregate) { Expression e = Alias.unwrap(agg); Object value = null; boolean isNewAgg = false; - if (e instanceof AggregateFunction af - && af.field().foldable() - && (DataType.isNull(af.field().dataType()) - || af.field() == null - || af.field() instanceof Literal lit && lit.value() == null)) { + if (e instanceof AggregateFunction af && Expressions.isNull(af.field())) { isNewAgg = true; if (af instanceof CountDistinct || af instanceof Count) { value = 0L; } - } else if (e instanceof Literal literal && literal.value() == null) { + } else if (Expressions.isNull(e)) {// FoldNull can replace an entire aggregate function with a null Literal isNewAgg = true; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index b50b801785a9f..c7597a1c4a69f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -295,14 +295,14 @@ public void testAggsExpressionsInStatsAggs() { + " found value [first_name] type [keyword]", error("from test | stats count(avg(first_name)) by first_name") ); - assertEquals( + /*assertEquals( "1:23: second argument of [percentile(languages, languages)] must be a constant, received [languages]", error("from test | stats x = percentile(languages, languages) by emp_no") ); assertEquals( "1:23: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", error("from test | stats x = count_distinct(languages, languages) by emp_no") - ); + );*/ // no agg function assertEquals("1:19: expected an aggregate function but found [5]", error("from test | stats 5 by emp_no")); From c2db11cba4b09ccba70ac766bc0da43082ad81f3 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Wed, 18 Sep 2024 19:53:48 +0300 Subject: [PATCH 04/16] Make count_distinct deal with Validateable interface --- .../function/aggregate/CountDistinct.java | 13 ++++++++-- .../xpack/esql/analysis/VerifierTests.java | 8 ------ .../optimizer/LogicalPlanOptimizerTests.java | 26 +++++++++++++++++++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index dfde67985635f..7ad04a2fba413 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -17,6 +17,8 @@ import org.elasticsearch.compute.aggregation.CountDistinctIntAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.CountDistinctLongAggregatorFunctionSupplier; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; +import org.elasticsearch.xpack.esql.capabilities.Validatable; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.Nullability; @@ -25,6 +27,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; +import org.elasticsearch.xpack.esql.expression.Validations; import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; @@ -45,7 +48,7 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isWholeNumber; -public class CountDistinct extends AggregateFunction implements OptionalArgument, ToAggregator, SurrogateExpression { +public class CountDistinct extends AggregateFunction implements OptionalArgument, ToAggregator, SurrogateExpression, Validatable { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, "CountDistinct", @@ -177,7 +180,13 @@ protected TypeResolution resolveType() { if (resolution.unresolved() || precision == null) { return resolution; } - return isWholeNumber(precision, sourceText(), SECOND).and(isFoldable(precision, sourceText(), SECOND)); + return isWholeNumber(precision, sourceText(), SECOND); + } + + public void validate(Failures failures) { + if (precision != null) { + failures.add(Validations.isFoldable(precision, sourceText(), SECOND)); + } } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index c7597a1c4a69f..650a0d3481d9d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -295,14 +295,6 @@ public void testAggsExpressionsInStatsAggs() { + " found value [first_name] type [keyword]", error("from test | stats count(avg(first_name)) by first_name") ); - /*assertEquals( - "1:23: second argument of [percentile(languages, languages)] must be a constant, received [languages]", - error("from test | stats x = percentile(languages, languages) by emp_no") - ); - assertEquals( - "1:23: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", - error("from test | stats x = count_distinct(languages, languages) by emp_no") - );*/ // no agg function assertEquals("1:19: expected an aggregate function but found [5]", error("from test | stats 5 by emp_no")); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 3ecdb4634938b..491671c911c67 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -5844,6 +5844,32 @@ public void testMvSortInvalidOrder() { assertEquals("Invalid order value in [mv_sort(v, o)], expected one of [ASC, DESC] but got [dsc]", iae.getMessage()); } + public void testCheckPercentileFoldableSecondArgument() { + VerificationException e = expectThrows(VerificationException.class, () -> plan(""" + from test + | stats x = percentile(languages, languages) by emp_no + """)); + assertTrue(e.getMessage().startsWith("Found ")); + final String header = "Found 1 problem\nline "; + assertEquals( + "2:35: second argument of [percentile(languages, languages)] must be a constant, received [languages]", + e.getMessage().substring(header.length()) + ); + } + + public void testCheckCountDistinctFoldableSecondArgument() { + VerificationException e = expectThrows(VerificationException.class, () -> plan(""" + from test + | stats x = count_distinct(languages, languages) by emp_no + """)); + assertTrue(e.getMessage().startsWith("Found ")); + final String header = "Found 1 problem\nline "; + assertEquals( + "2:39: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", + e.getMessage().substring(header.length()) + ); + } + private Literal nullOf(DataType dataType) { return new Literal(Source.EMPTY, null, dataType); } From aedad55bf8d5edd7894612e4d400c2e1e39f041a Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 19 Sep 2024 15:00:42 +0300 Subject: [PATCH 05/16] More count_distinct fixes, more tests for percentile's foldable second argument --- .../resources/stats_count_distinct.csv-spec | 58 +++++++++++++++++++ .../main/resources/stats_percentile.csv-spec | 38 +++++++++++- .../function/aggregate/CountDistinct.java | 6 +- .../function/aggregate/Percentile.java | 3 - .../esql/optimizer/LogicalPlanOptimizer.java | 1 + 5 files changed, 100 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec index b4f6a701ec272..ffedaedf770ce 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec @@ -177,3 +177,61 @@ m:long | languages:i 20 | 5 10 | null ; + +foldablePrecision +from employees | eval x = 82+8 | stats count_distinct(salary, x*100); + +count_distinct(salary, x*100):long +100 +; + +foldablePrecisionWithGroupings +from employees | eval x = 82+8 | stats cd1=count_distinct(salary, x*100), cd2=count_distinct(salary, 10+10) by gender, languages | sort gender, languages; + + cd1:long | cd2:long |gender:keyword |languages:integer +4 |4 |F |1 +5 |5 |F |2 +6 |6 |F |3 +6 |6 |F |4 +9 |9 |F |5 +3 |3 |F |null +9 |9 |M |1 +11 |11 |M |2 +11 |11 |M |3 +11 |11 |M |4 +8 |8 |M |5 +7 |7 |M |null +2 |2 |null |1 +3 |3 |null |2 +1 |1 |null |4 +4 |4 |null |5 +; + +foldableNullPrecision +from employees | eval x = 82+null | stats count_distinct(salary, x*100), count_distinct(salary, null), count_distinct(salary, null + 1); + +count_distinct(salary, x*100):long|count_distinct(salary, null):long|count_distinct(salary, null + 1):long +100 |100 |100 +; + +foldableNullPrecisionWithGroupings +from employees | eval x = 82+null | stats cdNullEval=count_distinct(salary, x*100), cdNull=count_distinct(salary, null), cdFoldedNull=count_distinct(salary, null + 1) by gender,languages | sort gender,languages; + +cdNullEval:long| cdNull:long |cdFoldedNull:long|gender:keyword | languages:integer +4 |4 |4 |F |1 +5 |5 |5 |F |2 +6 |6 |6 |F |3 +6 |6 |6 |F |4 +9 |9 |9 |F |5 +3 |3 |3 |F |null +9 |9 |9 |M |1 +11 |11 |11 |M |2 +11 |11 |11 |M |3 +11 |11 |11 |M |4 +8 |8 |8 |M |5 +7 |7 |7 |M |null +2 |2 |2 |null |1 +3 |3 |3 |null |2 +1 |1 |1 |null |4 +4 |4 |4 |null |5 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index 4b7697ac7443d..607258a3a7cb0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -156,7 +156,7 @@ m:double | p50:double | job_positions:keyword medianOfDoubleByKeyword#[skip:-8.12.99,reason:ReplaceStatsAggExpressionWithEval breaks bwc gh-103765] -from employees | stats m = median(salary_change), p50 = percentile(salary_change, 50)by job_positions | sort m desc | limit 4; +from employees | stats m = median(salary_change), p50 = percentile(salary_change, 50) by job_positions | sort m desc | limit 4; m:double | p50:double | job_positions:keyword 5.94 | 5.94 | "Accountant" @@ -240,3 +240,39 @@ row a=0 constant_single:double 5 ; + +foldablePercentileValue +from employees | eval x = 82+8 | stats percentile(salary, x), percentile(salary, 90); + +percentile(salary, x):double|percentile(salary, 90):double +68442.6 |68442.6 +; + + +foldablePercentileValue_WithGrouping +from employees | eval x = 82+8 | stats percentile(salary, x), percentile(salary, 90) by languages | sort languages desc; + +percentile(salary, x):double|percentile(salary, 90):double|languages:integer +73965.8 |73965.8 |null +54518.0 |54518.0 |5 +66286.2 |66286.2 |4 +68961.2 |68961.2 |3 +63067.2 |63067.2 |2 +69425.4 |69425.4 |1 +; + +nullPercentileValue +from employees | eval x = null + 1 | stats pNull = percentile(salary_change, null), pNullFolding = percentile(salary_change, null + 1), pNullEvalFolding = percentile(salary_change, x); + + pNull:double |pNullFolding:double|pNullEvalFolding:double +null |null |null +; + +nullPercentileValue_WithGrouping +from employees | eval x = null + 1 | stats pNull = percentile(salary_change, null), pNullFolding = percentile(salary_change, null + 1), pNullEvalFolding = percentile(salary_change, x), p = percentile(salary_change, 75) by gender | sort p; + + pNull:double |pNullFolding:double|pNullEvalFolding:double| p:double |gender:keyword +null |null |null |7.34 |M +null |null |null |8.3375 |F +null |null |null |8.985 |null +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 7ad04a2fba413..f16d1cc3806e0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -44,7 +44,6 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isWholeNumber; @@ -192,7 +191,10 @@ public void validate(Failures failures) { @Override public AggregatorFunctionSupplier supplier(List inputChannels) { DataType type = field().dataType(); - int precision = this.precision == null ? DEFAULT_PRECISION : ((Number) this.precision.fold()).intValue(); + // folding and checking with null here after the folding constants rules do their job, otherwise there could be a NPE + int precision = (this.precision == null || this.precision.fold() == null) + ? DEFAULT_PRECISION + : ((Number) this.precision.fold()).intValue(); if (type == DataType.BOOLEAN) { // Booleans ignore the precision because there are only two possible values anyway return new CountDistinctBooleanAggregatorFunctionSupplier(inputChannels); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java index 3fffb7f6f862b..2afa241d819ae 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.expression.function.aggregate; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -16,7 +15,6 @@ import org.elasticsearch.compute.aggregation.PercentileIntAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.PercentileLongAggregatorFunctionSupplier; import org.elasticsearch.xpack.esql.capabilities.Validatable; -import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -36,7 +34,6 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; public class Percentile extends NumericAggregate implements SurrogateExpression, Validatable { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index 8fecce542c44e..a26404a6b28d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -170,6 +170,7 @@ protected static Batch operators() { new CombineBinaryComparisons(), new CombineDisjunctions(), new SimplifyComparisonsArithmetics(DataType::areCompatible), + // new ReplaceStatsAggExpressionWithEval(), // prune/elimination new PruneFilters(), new PruneColumns(), From 4c03ce2878eab9cfba186abb82b534dd05666ac8 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 3 Oct 2024 12:32:43 +0300 Subject: [PATCH 06/16] Add more tests Introduce isConstantFoldable() for aggregate functions --- .../src/main/resources/spatial.csv-spec | 4 +- .../function/aggregate/AggregateFunction.java | 4 + .../aggregate/SpatialAggregateFunction.java | 13 +- .../rules/logical/PropagateEvalFoldables.java | 2 +- .../optimizer/LogicalPlanOptimizerTests.java | 326 +++++++++++++++++- 5 files changed, 338 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 1f71eff925174..45814dc65ec4a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -41,10 +41,9 @@ centroidOfNull FROM airports | eval z = TO_GEOPOINT(null) | STATS centroidNull = ST_CENTROID_AGG(null), centroidExpNull = ST_CENTROID_AGG(TO_GEOPOINT(null::string)), centroidEvalNull = ST_CENTROID_AGG(z); centroidNull:null|centroidExpNull:geo_point|centroidEvalNull:geo_point -null |null |null +POINT (NaN NaN) |null |POINT (NaN NaN) ; -########### failing :-( with InvalidArgumentException: Does not support yet aggregations over constants centroidFromStringNested required_capability: st_centroid_agg @@ -1312,7 +1311,6 @@ wkt:keyword |pt:cartesian_point ["POINT(4297.11 -1475.53)", "POINT(7580.93 2272.77)"] |[POINT(4297.11 -1475.53), POINT(7580.93 2272.77)] ; -########### failing :-( with InvalidArgumentException: Does not support yet aggregations over constants centroidCartesianFromStringNested required_capability: st_centroid_agg diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java index f0acac0e9744e..7c4f336fe1210 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java @@ -81,6 +81,10 @@ public List parameters() { return parameters; } + public boolean isConstantFoldable() { + return true; + } + /** * Returns the input expressions used in aggregation. * Defaults to a list containing the only the input field. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialAggregateFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialAggregateFunction.java index 5cb7edf2581d5..144f9878f8b0d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialAggregateFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/SpatialAggregateFunction.java @@ -35,6 +35,15 @@ protected SpatialAggregateFunction(StreamInput in, boolean useDocValues) throws public abstract SpatialAggregateFunction withDocValues(); + public boolean useDocValues() { + return useDocValues; + } + + @Override + public boolean isConstantFoldable() { + return false; + } + @Override public int hashCode() { // NB: the hashcode is currently used for key generation so @@ -52,8 +61,4 @@ public boolean equals(Object obj) { } return false; } - - public boolean useDocValues() { - return useDocValues; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java index 27c8a32ba762d..2f3fe744bb638 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java @@ -59,7 +59,7 @@ public LogicalPlan apply(LogicalPlan plan) { } else if (p instanceof Aggregate agg) { List newAggs = new ArrayList<>(agg.aggregates().size()); agg.aggregates().forEach(e -> { - if (Alias.unwrap(e) instanceof AggregateFunction) { + if (Alias.unwrap(e) instanceof AggregateFunction af && af.isConstantFoldable()) { newAggs.add((NamedExpression) e.transformUp(ReferenceAttribute.class, replaceReference)); } else { newAggs.add(e); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 6ff6cb4c477c5..f59018d96886f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.Build; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.BytesRefs; @@ -38,7 +39,6 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Or; import org.elasticsearch.xpack.esql.core.expression.predicate.nulls.IsNotNull; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; -import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -5591,7 +5591,327 @@ public void testCheckCountDistinctFoldableSecondArgument() { ); } - private Literal nullOf(DataType dataType) { - return new Literal(Source.EMPTY, null, dataType); + /** + * eval x = null + 1 | .... | stats avg(x) + * and + * avg(null + 1) + * + * Expects + * Project[[s{r}#10, y{r}#6]] + * \_TopN[[Order[y{r}#6,ASC,LAST]],10000[INTEGER]] + * \_Eval[[null[DOUBLE] AS s]] + * \_Aggregate[STANDARD,[y{r}#6],[y{r}#6]] + * \_Eval[[salary{f}#17 / 1000[INTEGER] AS y]] + * \_Limit[5[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] + */ + public void testAvg_Of_FoldableAndPropagateableNull() { + String[] queries = new String[] { """ + from test + | eval x = null + 1, y = salary / 1000 + | limit 5 + | stats s = avg(x) by y + | sort y + """, """ + from test + | eval y = salary / 1000 + | limit 5 + | stats s = avg(null + 1) by y + | sort y + """ }; + + for (String query : queries) { + var plan = optimizedPlan(query); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("s", "y")); + var sAttribute = Expressions.attribute(project.projections().get(0)); + var yAttribute = Expressions.attribute(project.projections().get(1)); + + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(1)); + + var order = as(topN.order().get(0), Order.class); + assertThat(order.direction(), equalTo(Order.OrderDirection.ASC)); + assertThat(order.nullsPosition(), equalTo(Order.NullsPosition.LAST)); + // same "y" for project and sort + assertThat(Expressions.attribute(order.child()), is(yAttribute)); + + var eval = as(topN.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + var evalAttribute = eval.fields().get(0); + // same "s" for project and eval + assertThat(sAttribute, is(Expressions.attribute(evalAttribute))); + assertTrue(evalAttribute.child().foldable()); + assertTrue(evalAttribute.child().fold() == null); + assertThat(evalAttribute.dataType(), equalTo(DOUBLE)); + + var aggregate = as(eval.child(), Aggregate.class); + assertThat(aggregate.aggregates().size(), equalTo(1)); + assertThat(aggregate.groupings().size(), equalTo(1)); + // same "y" for project and sort AND aggregations + assertThat(Expressions.attribute(aggregate.aggregates().get(0)), is(yAttribute)); + assertThat(Expressions.attribute(aggregate.groupings().get(0)), is(yAttribute)); + + eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + evalAttribute = eval.fields().get(0); + assertThat(yAttribute, is(Expressions.attribute(evalAttribute))); + assertThat(evalAttribute.child(), instanceOf(Div.class)); + assertThat(Expressions.names(evalAttribute.child().references()), contains("salary")); + + var limit = as(eval.child(), Limit.class); + assertThat(limit.limit().fold(), equalTo(5)); + + as(limit.child(), EsRelation.class); + } + } + + /** + * Expects + * Project[[s{r}#10, x{r}#3]] + * \_TopN[[Order[x{r}#3,ASC,LAST]],10000[INTEGER]] + * \_Eval[[$$SUM$s$0{r$}#22 / $$COUNT$s$1{r$}#23 AS s]] + * \_Aggregate[STANDARD,[x{r}#3],[SUM(y{r}#6) AS $$SUM$s$0, COUNT(y{r}#6) AS $$COUNT$s$1, x{r}#3]] + * \_Eval[[null[INTEGER] AS x, salary{f}#17 / 1000[INTEGER] AS y]] + * \_Limit[5[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] + */ + public void testAvg_By_FoldableAndPropagateableNull() { + var plan = optimizedPlan(""" + from test + | eval x = null + 1, y = salary / 1000 + | limit 5 + | stats s = avg(y) by x + | sort x + """); + + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("s", "x")); + var sAttribute = Expressions.attribute(project.projections().get(0)); + var xAttribute = Expressions.attribute(project.projections().get(1)); + + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(1)); + + var order = as(topN.order().get(0), Order.class); + assertThat(order.direction(), equalTo(Order.OrderDirection.ASC)); + assertThat(order.nullsPosition(), equalTo(Order.NullsPosition.LAST)); + // same "x" for project and sort + assertThat(Expressions.attribute(order.child()), is(xAttribute)); + + var eval = as(topN.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + var evalAttribute = eval.fields().get(0); + // same "s" for project and eval + assertThat(sAttribute, is(Expressions.attribute(evalAttribute))); + assertFalse(evalAttribute.child().foldable()); + var div = as(evalAttribute.child(), Div.class); + assertThat(Expressions.name(div.left()), startsWith("$$SUM$s$0")); + assertThat(Expressions.name(div.right()), startsWith("$$COUNT$s$1")); + assertThat(evalAttribute.dataType(), equalTo(DOUBLE)); + + var aggregate = as(eval.child(), Aggregate.class); + assertThat(aggregate.aggregates().size(), equalTo(3)); + assertThat(aggregate.groupings().size(), equalTo(1)); + // same "x" for project and sort AND groupings + assertThat(Expressions.attribute(aggregate.groupings().get(0)), is(xAttribute)); + + // average made of sum and count + // checking the sum first + var aliasSum = as(aggregate.aggregates().get(0), Alias.class); + assertThat(aliasSum.name(), equalTo("$$SUM$s$0")); + var sum = as(Alias.unwrap(aliasSum), Sum.class); + var yAttribute = Expressions.attribute(sum.field()); + assertThat(yAttribute.name(), equalTo("y")); + + // then the count + var aliasCount = as(aggregate.aggregates().get(1), Alias.class); + assertThat(aliasCount.name(), equalTo("$$COUNT$s$1")); + var count = as(Alias.unwrap(aliasCount), Count.class); + assertThat(count.field(), is(yAttribute)); // sum and count over the same field + + // and the grouping which is referenced in the aggregates as well + assertThat(Expressions.attribute(aggregate.aggregates().get(2)), is(xAttribute)); + + eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(2)); + + // x = null because x = null + 1 => x = null after folding + evalAttribute = eval.fields().get(0); + assertTrue(evalAttribute.child().foldable()); + assertTrue(evalAttribute.child().fold() == null); + assertThat(evalAttribute.dataType(), equalTo(INTEGER)); + assertThat(Expressions.attribute(evalAttribute), is(xAttribute)); + + // y = salary / 1000 + evalAttribute = eval.fields().get(1); + assertThat(yAttribute, is(Expressions.attribute(evalAttribute))); + assertThat(evalAttribute.child(), instanceOf(Div.class)); + assertThat(Expressions.names(evalAttribute.child().references()), contains("salary")); + + var limit = as(eval.child(), Limit.class); + assertThat(limit.limit().fold(), equalTo(5)); + + as(limit.child(), EsRelation.class); + } + + /** + * Expects + * Project[[s{r}#10, y{r}#6]] + * \_TopN[[Order[y{r}#6,ASC,LAST]],10000[INTEGER]] + * \_Eval[[$$COUNT$$$SUM$s$0$0{r$}#24 * 10[INTEGER] AS $$SUM$s$0, $$SUM$s$0{r$}#22 / $$COUNT$s$1{r$}#23 AS s]] + * \_Aggregate[STANDARD,[y{r}#6],[COUNT([*][KEYWORD]) AS $$COUNT$$$SUM$s$0$0, COUNT(10[INTEGER]) AS $$COUNT$s$1, y{r}#6]] + * \_Eval[[salary{f}#17 / 1000[INTEGER] AS y]] + * \_Limit[5[INTEGER]] + * \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..] + */ + public void testAvg_Of_FoldableAndPropagateable_NonNull() { + var query = """ + from test + | eval x = 9 + 1, y = salary / 1000 + | limit 5 + | stats s = avg(x) by y + | sort y + """; + + // There are multiple rules applied here. Below the simplified order of them being applied + // + // surrogates const folding surrogates const folding + // avg(x) => sum(x)/count(x) => sum(10)/count(10) => mv_sum(10)*count(*)/count(10) => 10*count(*)/count(10) + + var plan = optimizedPlan(query); + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("s", "y")); + var sAttribute = Expressions.attribute(project.projections().get(0)); + var yAttribute = Expressions.attribute(project.projections().get(1)); + + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(1)); + + var order = as(topN.order().get(0), Order.class); + assertThat(order.direction(), equalTo(Order.OrderDirection.ASC)); + assertThat(order.nullsPosition(), equalTo(Order.NullsPosition.LAST)); + // same "y" for project and sort + assertThat(Expressions.attribute(order.child()), is(yAttribute)); + + var eval = as(topN.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(2)); + + // 10 * count(*) + Alias evalAttribute = eval.fields().get(0); + assertThat(evalAttribute.name(), equalTo("$$SUM$s$0")); + var mul = as(evalAttribute.child(), Mul.class); + var literal = as(mul.right(), Literal.class); + assertThat(literal.fold(), equalTo(10)); + assertThat(Expressions.name(mul.left()), equalTo("$$COUNT$$$SUM$s$0$0")); + Attribute countAll = Expressions.attribute(mul.left()); + + // expression / count(10) + evalAttribute = eval.fields().get(1); + // same "s" for project and eval + assertThat(sAttribute, is(Expressions.attribute(evalAttribute))); + var div = as(evalAttribute.child(), Div.class); + assertThat(Expressions.name(div.left()), equalTo("$$SUM$s$0")); + assertThat(Expressions.name(div.right()), equalTo("$$COUNT$s$1")); + Attribute sum10 = Expressions.attribute(div.left()); + Attribute count10 = Expressions.attribute(div.right()); + + var aggregate = as(eval.child(), Aggregate.class); + assertThat(aggregate.aggregates().size(), equalTo(3)); + + // same "y" for project and sort AND aggregations + assertThat(Expressions.attribute(aggregate.aggregates().get(2)), is(yAttribute)); + + // this is count(10) coming from sum's surrogate expression and after constant folding + Alias a = (Alias) aggregate.aggregates().get(1); + assertThat(Expressions.attribute(a), is(count10)); + Count c = as(a.child(), Count.class); + literal = as(c.field(), Literal.class); + assertThat(literal.fold(), equalTo(10)); + + // this is count(*) coming from sum's surrogate expression + a = (Alias) aggregate.aggregates().get(0); + assertThat(Expressions.attribute(a), is(countAll)); + c = as(a.child(), Count.class); + literal = as(c.field(), Literal.class); + assertThat(((BytesRef) literal.fold()).utf8ToString(), equalTo("*")); + + assertThat(aggregate.groupings().size(), equalTo(1)); + assertThat(Expressions.attribute(aggregate.groupings().get(0)), is(yAttribute)); + + eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + evalAttribute = eval.fields().get(0); + assertThat(yAttribute, is(Expressions.attribute(evalAttribute))); + assertThat(evalAttribute.child(), instanceOf(Div.class)); + assertThat(Expressions.names(evalAttribute.child().references()), contains("salary")); + + var limit = as(eval.child(), Limit.class); + assertThat(limit.limit().fold(), equalTo(5)); + + as(limit.child(), EsRelation.class); + } + + /** + * Expects + * Project[[s{r}#7, y{r}#4]] + * \_TopN[[Order[y{r}#4,ASC,LAST]],10000[INTEGER]] + * \_Eval[[10.0[DOUBLE] AS s]] + * \_Aggregate[STANDARD,[y{r}#4],[y{r}#4]] + * \_Eval[[salary{f}#14 / 1000[INTEGER] AS y]] + * \_Limit[5[INTEGER]] + * \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..] + */ + public void testAvg_Of_Foldable_NonNull() { + var query = """ + from test + | eval y = salary / 1000 + | limit 5 + | stats s = avg(9 + 1) by y + | sort y + """; + + var plan = optimizedPlan(query); + var project = as(plan, Project.class); + assertThat(Expressions.names(project.projections()), contains("s", "y")); + var sAttribute = Expressions.attribute(project.projections().get(0)); + var yAttribute = Expressions.attribute(project.projections().get(1)); + + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(1)); + + var order = as(topN.order().get(0), Order.class); + assertThat(order.direction(), equalTo(Order.OrderDirection.ASC)); + assertThat(order.nullsPosition(), equalTo(Order.NullsPosition.LAST)); + // same "y" for project and sort + assertThat(Expressions.attribute(order.child()), is(yAttribute)); + + var eval = as(topN.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + Alias evalAttribute = eval.fields().get(0); + assertThat(sAttribute, is(Expressions.attribute(evalAttribute))); + Literal l = as(evalAttribute.child(), Literal.class); + assertThat(l.value(), equalTo(10.0d)); + assertThat(l.dataType(), equalTo(DOUBLE)); + + var aggregate = as(eval.child(), Aggregate.class); + assertThat(aggregate.aggregates().size(), equalTo(1)); + assertThat(aggregate.groupings().size(), equalTo(1)); + // same "y" for project and sort AND aggregations + assertThat(Expressions.attribute(aggregate.aggregates().get(0)), is(yAttribute)); + assertThat(Expressions.attribute(aggregate.groupings().get(0)), is(yAttribute)); + + eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields().size(), equalTo(1)); + evalAttribute = eval.fields().get(0); + assertThat(yAttribute, is(Expressions.attribute(evalAttribute))); + assertThat(evalAttribute.child(), instanceOf(Div.class)); + assertThat(Expressions.names(evalAttribute.child().references()), contains("salary")); + + var limit = as(eval.child(), Limit.class); + assertThat(limit.limit().fold(), equalTo(5)); + + as(limit.child(), EsRelation.class); } } From 33a8587d7cc473a0a5c8105c17e17212e4c97ea7 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 3 Oct 2024 12:54:45 +0300 Subject: [PATCH 07/16] Update docs/changelog/112392.yaml --- docs/changelog/112392.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 docs/changelog/112392.yaml diff --git a/docs/changelog/112392.yaml b/docs/changelog/112392.yaml new file mode 100644 index 0000000000000..a6c60f5ae5929 --- /dev/null +++ b/docs/changelog/112392.yaml @@ -0,0 +1,8 @@ +pr: 112392 +summary: "ES|QL: Improve aggregation over constants handling" +area: ES|QL +type: bug +issues: + - 110257 + - 100170 + - 104430 From 636a6e05466220f5322519bb090eb728d6284440 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 3 Oct 2024 15:20:21 +0300 Subject: [PATCH 08/16] Address feedback Add capability --- .../src/main/resources/row.csv-spec | 17 ++++- .../src/main/resources/spatial.csv-spec | 6 +- .../src/main/resources/stats.csv-spec | 48 +++++++++++-- .../resources/stats_count_distinct.csv-spec | 10 ++- .../src/main/resources/stats_mad.csv-spec | 7 +- .../main/resources/stats_percentile.csv-spec | 67 +++++++++++++++++-- .../src/main/resources/stats_top.csv-spec | 36 ++++++++-- .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../esql/optimizer/LogicalPlanOptimizer.java | 1 + .../logical/ReplaceAggregatesWithNull.java | 32 ++++----- 10 files changed, 193 insertions(+), 38 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 77668939a14b2..b1218bcc91ef0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -266,14 +266,27 @@ c:long | a:double | mi:integer | ma:integer | s:long ; rowWithMultipleStatsOverNull2 -row x=1, y=2 | stats c=count(null + 1), c_d=count_distinct(1 + null), a=avg(null + x), mi=min(y + null), ma=max(y + x * null), s=sum(null); +row x=1, y=2 +| stats c=count(null + 1), + c_d=count_distinct(1 + null), + a=avg(null + x), + mi=min(y + null), + ma=max(y + x * null), + s=sum(null); c:long | c_d:long | a:double | mi:integer | ma:integer | s:double 0 |0 |null |null |null |null ; rowWithMultipleStatsOverNull3 -ROW a = 1, b = "two", c = null | STATS `a_count` = COUNT(`a`), `a_cardinality` = COUNT_DISTINCT(`a`),`b_count` = COUNT(`b`), `b_cardinality` = COUNT_DISTINCT(`b`),`c_count` = COUNT(`c`), `c_cardinality` = COUNT_DISTINCT(`c`) +required_capability: extend_aggs_on_constants_support +ROW a = 1, b = "two", c = null +| STATS `a_count` = COUNT(`a`), + `a_cardinality` = COUNT_DISTINCT(`a`), + `b_count` = COUNT(`b`), + `b_cardinality` = COUNT_DISTINCT(`b`), + `c_count` = COUNT(`c`), + `c_cardinality` = COUNT_DISTINCT(`c`) ; a_count:long |a_cardinality:long |b_count:long |b_cardinality:long |c_count:long | c_cardinality:long diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 45814dc65ec4a..4bce53d3589b6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -38,7 +38,11 @@ wkt:keyword ; centroidOfNull -FROM airports | eval z = TO_GEOPOINT(null) | STATS centroidNull = ST_CENTROID_AGG(null), centroidExpNull = ST_CENTROID_AGG(TO_GEOPOINT(null::string)), centroidEvalNull = ST_CENTROID_AGG(z); +FROM airports +| eval z = TO_GEOPOINT(null) +| STATS centroidNull = ST_CENTROID_AGG(null), + centroidExpNull = ST_CENTROID_AGG(TO_GEOPOINT(null::string)), + centroidEvalNull = ST_CENTROID_AGG(z); centroidNull:null|centroidExpNull:geo_point|centroidEvalNull:geo_point POINT (NaN NaN) |null |POINT (NaN NaN) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 247d04cc1103b..ada8ceb0629cb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2001,6 +2001,7 @@ x:integer |z:integer ; emptyStatsBy2 +required_capability: extend_aggs_on_constants_support from employees | eval x = [1,2,3], y = null | stats max(y) by x; max(y):null |x:integer @@ -2010,14 +2011,36 @@ null |3 ; statsOfPropagateableConst -row foo="unused" | eval mv=[1,2,3] | stats avg = avg(mv), avg([5,6]), min(mv), min([5,6]), max(mv), max([5,6]), count(mv), count([5,6]), count_distinct(mv), count_distinct([5,5,6]); +row foo="unused" +| eval mv=[1,2,3] +| stats avg = avg(mv), + avg([5,6]), + min(mv), + min([5,6]), + max(mv), + max([5,6]), + count(mv), + count([5,6]), + count_distinct(mv), + count_distinct([5,5,6]); avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long 2.0 |5.5 |1 |5 |3 |6 |3 |2 |3 |2 ; statsOfPropagateableConstOnIndex -from employees | eval mv=[1,2,3] | stats avg = avg(mv), avg([5,6]), min(mv), min([5,6]), max(mv), max([5,6]), count(mv), count([5,6]), count_distinct(mv), count_distinct([5,5,6]); +from employees +| eval mv=[1,2,3] +| stats avg = avg(mv), + avg([5,6]), + min(mv), + min([5,6]), + max(mv), + max([5,6]), + count(mv), + count([5,6]), + count_distinct(mv), + count_distinct([5,5,6]); avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long 2.0 |5.5 |1 |5 |3 |6 |300 |200 |3 |2 @@ -2049,7 +2072,14 @@ COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a ; countDistinctWithGrouping -from employees | EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] | STATS `cd(5+6)`=COUNT_DISTINCT(c), `cd(null)`=COUNT_DISTINCT(null), `cd(null-1)`=COUNT_DISTINCT(null - 1), `cd(eval_null)`=COUNT_DISTINCT(d), `cd(mv)`=COUNT_DISTINCT(e) BY gender | sort gender; +from employees +| EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] +| STATS `cd(5+6)`=COUNT_DISTINCT(c), + `cd(null)`=COUNT_DISTINCT(null), + `cd(null-1)`=COUNT_DISTINCT(null - 1), + `cd(eval_null)`=COUNT_DISTINCT(d), + `cd(mv)`=COUNT_DISTINCT(e) BY gender +| sort gender; cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long |gender:keyword 1 |0 |0 |0 |2 |F @@ -2058,7 +2088,16 @@ cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long | ; countWithGrouping -from employees | EVAL a = 5 + 6, b = 6 + null, e = [1,2,2] | STATS `c(5+6)`=COUNT(a), `c(null)`=COUNT(null), `c(null-1)`=COUNT(null - 1), `c(eval_null)`=COUNT(b), `c(mv)`=COUNT(e) BY gender | sort gender; +from employees +| EVAL a = 5 + 6, + b = 6 + null, + e = [1,2,2] +| STATS `c(5+6)`=COUNT(a), + `c(null)`=COUNT(null), + `c(null-1)`=COUNT(null - 1), + `c(eval_null)`=COUNT(b), + `c(mv)`=COUNT(e) BY gender +| sort gender; c(5+6):long |c(null):long |c(null-1):long |c(eval_null):long |c(mv):long |gender:keyword 33 |0 |0 |0 |99 |F @@ -2367,6 +2406,7 @@ m:integer |a:double |x:integer ; valuesOfConstants +required_capability: extend_aggs_on_constants_support row mv_int = [1,2,3,2], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null | stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null) ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec index ffedaedf770ce..5c032eddd74a9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec @@ -179,6 +179,7 @@ m:long | languages:i ; foldablePrecision +required_capability: extend_aggs_on_constants_support from employees | eval x = 82+8 | stats count_distinct(salary, x*100); count_distinct(salary, x*100):long @@ -208,6 +209,7 @@ from employees | eval x = 82+8 | stats cd1=count_distinct(salary, x*100), cd2=co ; foldableNullPrecision +required_capability: extend_aggs_on_constants_support from employees | eval x = 82+null | stats count_distinct(salary, x*100), count_distinct(salary, null), count_distinct(salary, null + 1); count_distinct(salary, x*100):long|count_distinct(salary, null):long|count_distinct(salary, null + 1):long @@ -215,7 +217,13 @@ count_distinct(salary, x*100):long|count_distinct(salary, null):long|count_disti ; foldableNullPrecisionWithGroupings -from employees | eval x = 82+null | stats cdNullEval=count_distinct(salary, x*100), cdNull=count_distinct(salary, null), cdFoldedNull=count_distinct(salary, null + 1) by gender,languages | sort gender,languages; +required_capability: extend_aggs_on_constants_support +from employees +| eval x = 82+null +| stats cdNullEval=count_distinct(salary, x*100), + cdNull=count_distinct(salary, null), + cdFoldedNull=count_distinct(salary, null + 1) by gender,languages +| sort gender,languages; cdNullEval:long| cdNull:long |cdFoldedNull:long|gender:keyword | languages:integer 4 |4 |4 |F |1 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec index 3977325ab3cb2..c4035a218090b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec @@ -1,5 +1,10 @@ madOfNulls -row x = null::integer, y = null, z = 123 - null, a = [1,2,3], b = 5 | stats x = MEDIAN_ABSOLUTE_DEVIATION(x), y = MEDIAN_ABSOLUTE_DEVIATION(y), z = MEDIAN_ABSOLUTE_DEVIATION(null), madnull = MEDIAN_ABSOLUTE_DEVIATION(2+null), madnullEvalNull = MEDIAN_ABSOLUTE_DEVIATION(z) by a, b; +row x = null::integer, y = null, z = 123 - null, a = [1,2,3], b = 5 +| stats x = MEDIAN_ABSOLUTE_DEVIATION(x), + y = MEDIAN_ABSOLUTE_DEVIATION(y), + z = MEDIAN_ABSOLUTE_DEVIATION(null), + madnull = MEDIAN_ABSOLUTE_DEVIATION(2+null), + madnullEvalNull = MEDIAN_ABSOLUTE_DEVIATION(z) by a, b; x:double|y:double |z:double |madnull:double |madnullEvalNull:double|a:integer |b:integer null |null |null |null |null |1 |5 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index 607258a3a7cb0..80e2cba300ad7 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -1,19 +1,54 @@ percentileOfNull -row x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(y, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90); +row x = null::integer, y = null, z = 123 - null +| stats percIntNull = percentile(y, 90), + percNull = percentile(y, 90), + percentile(null, 90), + percentile(null+2, 90), + percentile(z, 90); percIntNull:double|percNull:double|percentile(null, 90):double|percentile(null+2, 90):double|percentile(z, 90):double null |null |null |null |null ; percentileWithPropagateablePercentileArgument -row value200 = 200, x = null::integer, y = null, z = 123 - null, p90 = 80+10 | eval p100 = p90 + 10, p3_null = null + 2, p50 = 40+10 | stats percentile(value200, p90), percentile(value200, p100), percentile(value200, p3_null), percentile(value200, p50), percentile(p90, p90), percentile(p90, p50), percentile(p3_null, p3_null), percentile(p100, p3_null); +required_capability: extend_aggs_on_constants_support +row value200 = 200, x = null::integer, y = null, z = 123 - null, p90 = 80+10 +| eval p100 = p90 + 10, + p3_null = null + 2, + p50 = 40+10 +| stats percentile(value200, p90), + percentile(value200, p100), + percentile(value200, p3_null), + percentile(value200, p50), + percentile(p90, p90), + percentile(p90, p50), + percentile(p3_null, p3_null), + percentile(p100, p3_null); percentile(value200, p90):double|percentile(value200, p100):double|percentile(value200, p3_null):double|percentile(value200, p50):double|percentile(p90, p90):double|percentile(p90, p50):double|percentile(p3_null, p3_null):double|percentile(p100, p3_null):double 200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null ; percentileWithPropagateablePercentileArgumentOnRealIndex -from employees | eval value200 = 200, x = null::integer, y = null, z = 123 - null, p90 = 80+10 | eval p100 = p90 + 10, p3_null = null + 2, p50 = 40+10 | stats percentile(value200, p90), percentile(value200, p100), percentile(value200, p3_null), percentile(value200, p50), percentile(p90, p90), percentile(p90, p50), percentile(p3_null, p3_null), percentile(p100, p3_null) by gender | sort gender; +required_capability: extend_aggs_on_constants_support +from employees +| eval value200 = 200, + x = null::integer, + y = null, + z = 123 - null, + p90 = 80+10 +| eval p100 = p90 + 10, + p3_null = null + 2, + p50 = 40+10 +| stats percentile(value200, p90), + percentile(value200, p100), + percentile(value200, p3_null), + percentile(value200, p50), + percentile(p90, p90), + percentile(p90, p50), + percentile(p3_null, p3_null), + percentile(p100, p3_null) by gender +| sort gender; percentile(value200, p90):double|percentile(value200, p100):double|percentile(value200, p3_null):double|percentile(value200, p50):double|percentile(p90, p90):double|percentile(p90, p50):double|percentile(p3_null, p3_null):double|percentile(p100, p3_null):double|gender:keyword 200.0 |200.0 |null |200.0 |90.0 |90.0 |null |null |F @@ -22,7 +57,16 @@ percentile(value200, p90):double|percentile(value200, p100):double|percentile(va ; percentileOfNullsOnRealIndex -from employees | eval x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(x, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), percentile(z, 90) by languages | sort languages desc; +from employees +| eval x = null::integer, + y = null, + z = 123 - null +| stats percIntNull = percentile(x, 90), + percNull = percentile(y, 90), + percentile(null, 90), + percentile(null+2, 90), + percentile(z, 90) by languages +| sort languages desc; percIntNull:double|percNull:double|percentile(null, 90):double|percentile(null+2, 90):double|percentile(z, 90):double|languages:integer null |null |null |null |null |null @@ -262,14 +306,25 @@ percentile(salary, x):double|percentile(salary, 90):double|languages:integer ; nullPercentileValue -from employees | eval x = null + 1 | stats pNull = percentile(salary_change, null), pNullFolding = percentile(salary_change, null + 1), pNullEvalFolding = percentile(salary_change, x); +required_capability: extend_aggs_on_constants_support +from employees +| eval x = null + 1 +| stats pNull = percentile(salary_change, null), + pNullFolding = percentile(salary_change, null + 1), + pNullEvalFolding = percentile(salary_change, x); pNull:double |pNullFolding:double|pNullEvalFolding:double null |null |null ; nullPercentileValue_WithGrouping -from employees | eval x = null + 1 | stats pNull = percentile(salary_change, null), pNullFolding = percentile(salary_change, null + 1), pNullEvalFolding = percentile(salary_change, x), p = percentile(salary_change, 75) by gender | sort p; +from employees +| eval x = null + 1 +| stats pNull = percentile(salary_change, null), + pNullFolding = percentile(salary_change, null + 1), + pNullEvalFolding = percentile(salary_change, x), + p = percentile(salary_change, 75) by gender +| sort p; pNull:double |pNullFolding:double|pNullEvalFolding:double| p:double |gender:keyword null |null |null |7.34 |M diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index c95e67da5a51f..11608ff7904c8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -42,7 +42,10 @@ date:date | double:double | integer:integer | long:long topRowWithEval required_capability: agg_top -ROW d=-9.81, i=25324, l=TO_LONG(-9) | EVAL x=d+1, y=i/2, z=l+3 | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 2, "asc"), long = TOP(z, 2, "asc") | keep double, integer, long +ROW d=-9.81, i=25324, l=TO_LONG(-9) +| EVAL x=d+1, y=i/2, z=l+3 +| STATS double = TOP(x, 2, "asc"), integer = TOP(y, 2, "asc"), long = TOP(z, 2, "asc") +| keep double, integer, long ; double:double |integer:integer| long:long @@ -51,7 +54,15 @@ ROW d=-9.81, i=25324, l=TO_LONG(-9) | EVAL x=d+1, y=i/2, z=l+3 | STATS double = topRowWithNull required_capability: agg_top -ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc"), null3 = TOP(null, 10, "desc") | keep double, long, integer, null* +ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null +| EVAL x=d+1+null, y=i/2+n, z=l+3, t=null +| STATS double = TOP(x, 2, "asc"), + integer = TOP(y, 5, "desc"), + long = TOP(z, 8, "desc"), + null1 = TOP(n, 1000, "asc"), + null2 = TOP(t, 444, "desc"), + null3 = TOP(null, 10, "desc") +| keep double, long, integer, null* ; double:double |long:long |integer:integer| null1:null |null2:null |null3:null @@ -60,7 +71,14 @@ null |-6 |null |null |null | topRowWithNull_WithBY required_capability: agg_top -ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null, `by` = [1,2,3] | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), integer = TOP(y, 5, "desc"), long = TOP(z, 8, "desc"), null1 = TOP(n, 1000, "asc"), null2 = TOP(t, 444, "desc") BY `by` | keep double, long, integer, null1, null2, `by` +ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null, `by` = [1,2,3] +| EVAL x=d+1+null, y=i/2+n, z=l+3, t=null +| STATS double = TOP(x, 2, "asc"), + integer = TOP(y, 5, "desc"), + long = TOP(z, 8, "desc"), + null1 = TOP(n, 1000, "asc"), + null2 = TOP(t, 444, "desc") BY `by` +| keep double, long, integer, null1, null2, `by` ; double:double |long:long |integer:integer| null1:null |null2:null | by:integer @@ -71,7 +89,9 @@ null |-6 |null |null |null | topRowWithMultiValues required_capability: agg_top -row x = [1,2,3,4,5], z = [1,2,3,4,5] | eval y = mv_slice(x, 1, 3) | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") +row x = [1,2,3,4,5], z = [1,2,3,4,5] +| eval y = mv_slice(x, 1, 3) +| stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") ; z2a:integer|z3d:integer|y2a:integer|y2d:integer @@ -80,7 +100,9 @@ z2a:integer|z3d:integer|y2a:integer|y2d:integer topRowWithMultiValues_WithBY required_capability: agg_top -row x = [1,2,3,4,5], z = [1,2,3,4,5] | eval y = mv_slice(x, 1, 3) | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") by y +row x = [1,2,3,4,5], z = [1,2,3,4,5] +| eval y = mv_slice(x, 1, 3) +| stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), y2a=top(y, 2, "asc"), y2d=top(y, 2, "desc") by y ; z2a:integer|z3d:integer|y2a:integer|y2d:integer| y:integer @@ -90,7 +112,9 @@ z2a:integer|z3d:integer|y2a:integer|y2d:integer| y:integer ; topRowWithDuplicatesInMultiValues -row x = [1,2,3,2,4,5,5], z = [1,2,3,4,5] | stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), x5a=top(x, 5, "asc"), x5d=top(x, 5, "desc") by x | sort x desc +row x = [1,2,3,2,4,5,5], z = [1,2,3,4,5] +| stats z2a=top(z, 2, "asc"), z3d=top(z, 3, "desc"), x5a=top(x, 5, "asc"), x5d=top(x, 5, "desc") by x +| sort x desc ; z2a:integer | z3d:integer | x5a:integer | x5d:integer | x:integer diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index f0fa89dedd9ab..4e4078c9faad5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -337,7 +337,12 @@ public enum Cap { /** * Compute year differences in full calendar years. */ - DATE_DIFF_YEAR_CALENDARIAL; + DATE_DIFF_YEAR_CALENDARIAL, + + /** + * Support aggregations on constants in more scenarios. + */ + EXTEND_AGGS_ON_CONSTANTS_SUPPORT; private final boolean snapshotOnly; private final FeatureFlag featureFlag; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index d3b3c00fa2afa..ad2021ec97e61 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -170,6 +170,7 @@ protected static Batch operators() { new CombineBinaryComparisons(), new CombineDisjunctions(), new SimplifyComparisonsArithmetics(DataType::areCompatible), + // TODO see https://github.com/elastic/elasticsearch/issues/113393 // new ReplaceStatsAggExpressionWithEval(), // prune/elimination new PruneFilters(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregatesWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregatesWithNull.java index 94826ef943e52..3c53bae2f81ea 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregatesWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregatesWithNull.java @@ -45,25 +45,25 @@ public ReplaceAggregatesWithNull() { @Override protected LogicalPlan rule(Aggregate aggregate) { - var aggs = aggregate.aggregates(); - List newAggs = new ArrayList<>(aggs.size()); + var aggregates = aggregate.aggregates(); + List remainingAggregates = new ArrayList<>(aggregates.size()); boolean changed = false; - List transientEval = new ArrayList<>(); + List replacingEvalVariables = new ArrayList<>(); - for (NamedExpression agg : aggs) { + for (NamedExpression agg : aggregates) { Expression e = Alias.unwrap(agg); Object value = null; - boolean isNewAgg = false; + boolean isNullAgg = false; if (e instanceof AggregateFunction af && Expressions.isNull(af.field())) { - isNewAgg = true; + isNullAgg = true; if (af instanceof CountDistinct || af instanceof Count) { value = 0L; } } else if (Expressions.isNull(e)) {// FoldNull can replace an entire aggregate function with a null Literal - isNewAgg = true; + isNullAgg = true; } - if (isNewAgg) { + if (isNullAgg) { /* * Add an eval for every null (even if they are all "null"s, they can have different data types, * depending on the Aggregate function return type). @@ -71,18 +71,18 @@ protected LogicalPlan rule(Aggregate aggregate) { * don't have to be updated. PruneColumns makes use of the Attribute ids to decide if unused references can be removed. */ var aliased = new Alias(agg.source(), agg.name(), Literal.of(agg, value), agg.toAttribute().id(), true); - transientEval.add(aliased); + replacingEvalVariables.add(aliased); changed = true; } else { - newAggs.add(agg); + remainingAggregates.add(agg); } } LogicalPlan plan = aggregate; - if (changed) { + if (changed) {// if there were null aggregates found var source = aggregate.source(); - if (newAggs.isEmpty() == false) { - plan = new Aggregate(source, aggregate.child(), aggregate.aggregateType(), aggregate.groupings(), newAggs); + if (remainingAggregates.isEmpty() == false) {// build the new Aggregate with the rest (non-null) aggregates + plan = new Aggregate(source, aggregate.child(), aggregate.aggregateType(), aggregate.groupings(), remainingAggregates); } else { // All aggs actually have been optimized away // \_Aggregate[[],[AVG([NULL][NULL]) AS s]] @@ -95,9 +95,9 @@ protected LogicalPlan rule(Aggregate aggregate) { LocalSupplier.of(new Block[] { BlockUtils.constantBlock(PlannerUtils.NON_BREAKING_BLOCK_FACTORY, null, 1) }) ); } - if (transientEval.isEmpty() == false) { - plan = new Eval(source, plan, transientEval); - plan = new Project(source, plan, Expressions.asAttributes(aggs)); + if (replacingEvalVariables.isEmpty() == false) { + plan = new Eval(source, plan, replacingEvalVariables); + plan = new Project(source, plan, Expressions.asAttributes(aggregates)); } } From 5000836ea275e914231bf7b201d4980ab824b5bb Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Fri, 4 Oct 2024 11:07:18 +0300 Subject: [PATCH 09/16] More capabilities to csv-spec files --- .../src/main/resources/spatial.csv-spec | 1 + .../src/main/resources/stats.csv-spec | 19 +++++++++---------- .../resources/stats_count_distinct.csv-spec | 1 + .../src/main/resources/stats_mad.csv-spec | 1 + .../main/resources/stats_percentile.csv-spec | 4 ++++ .../src/main/resources/stats_top.csv-spec | 2 ++ 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 4bce53d3589b6..45d183451f588 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -38,6 +38,7 @@ wkt:keyword ; centroidOfNull +required_capability: extend_aggs_on_constants_support FROM airports | eval z = TO_GEOPOINT(null) | STATS centroidNull = ST_CENTROID_AGG(null), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index ada8ceb0629cb..d143570b2d217 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2029,6 +2029,7 @@ row foo="unused" ; statsOfPropagateableConstOnIndex +required_capability: extend_aggs_on_constants_support from employees | eval mv=[1,2,3] | stats avg = avg(mv), @@ -2072,6 +2073,7 @@ COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a ; countDistinctWithGrouping +required_capability: extend_aggs_on_constants_support from employees | EVAL c = 5 + 6, d = 6 + null, e = [1,2,2] | STATS `cd(5+6)`=COUNT_DISTINCT(c), @@ -2088,6 +2090,7 @@ cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long | ; countWithGrouping +required_capability: extend_aggs_on_constants_support from employees | EVAL a = 5 + 6, b = 6 + null, @@ -2408,40 +2411,36 @@ m:integer |a:double |x:integer valuesOfConstants required_capability: extend_aggs_on_constants_support row mv_int = [1,2,3,2], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null -| stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null) -; +| stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null); values(null):null|values(mv_int):integer|values(string):keyword|values(mv_string):keyword|values(null_exp):integer|values(123 + null):integer|values(to_be_casted_ip::ip):ip|values(row_null):null null |[1, 2, 3] |abc |[bar, foo] |null |null |127.0.0.1 |null ; byFoldableValueFromEval -from employees | eval y = 5 + 6 | stats max(salary) by y -; +from employees | eval y = 5 + 6 | stats max(salary) by y; max(salary):integer|y:integer 74999 |11 ; byFoldableValueInline -from employees | stats max(salary) by 5+6 -; +from employees | stats max(salary) by 5+6; max(salary):integer| 5+6:integer 74999 |11 ; byFoldableValueInlineWithAlias -from employees | stats max(salary) by y=5+6 -; +from employees | stats max(salary) by y=5+6; max(salary):integer| y:integer 74999 |11 ; byAndMaxFoldableValues -from employees | eval x = [1,2,3], y = 5 + 6 | stats max(x) by y -; +required_capability: extend_aggs_on_constants_support +from employees | eval x = [1,2,3], y = 5 + 6 | stats max(x) by y; max(x):integer|y:integer 3 |11 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec index 5c032eddd74a9..14bebde99c649 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_count_distinct.csv-spec @@ -187,6 +187,7 @@ count_distinct(salary, x*100):long ; foldablePrecisionWithGroupings +required_capability: extend_aggs_on_constants_support from employees | eval x = 82+8 | stats cd1=count_distinct(salary, x*100), cd2=count_distinct(salary, 10+10) by gender, languages | sort gender, languages; cd1:long | cd2:long |gender:keyword |languages:integer diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec index c4035a218090b..4eed21fc76906 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_mad.csv-spec @@ -1,4 +1,5 @@ madOfNulls +required_capability: extend_aggs_on_constants_support row x = null::integer, y = null, z = 123 - null, a = [1,2,3], b = 5 | stats x = MEDIAN_ABSOLUTE_DEVIATION(x), y = MEDIAN_ABSOLUTE_DEVIATION(y), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index 80e2cba300ad7..a3a41d54a33de 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -1,4 +1,5 @@ percentileOfNull +required_capability: extend_aggs_on_constants_support row x = null::integer, y = null, z = 123 - null | stats percIntNull = percentile(y, 90), percNull = percentile(y, 90), @@ -286,6 +287,7 @@ constant_single:double ; foldablePercentileValue +required_capability: extend_aggs_on_constants_support from employees | eval x = 82+8 | stats percentile(salary, x), percentile(salary, 90); percentile(salary, x):double|percentile(salary, 90):double @@ -294,6 +296,7 @@ percentile(salary, x):double|percentile(salary, 90):double foldablePercentileValue_WithGrouping +required_capability: extend_aggs_on_constants_support from employees | eval x = 82+8 | stats percentile(salary, x), percentile(salary, 90) by languages | sort languages desc; percentile(salary, x):double|percentile(salary, 90):double|languages:integer @@ -318,6 +321,7 @@ null |null |null ; nullPercentileValue_WithGrouping +required_capability: extend_aggs_on_constants_support from employees | eval x = null + 1 | stats pNull = percentile(salary_change, null), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index 11608ff7904c8..9a012a134ea44 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -54,6 +54,7 @@ ROW d=-9.81, i=25324, l=TO_LONG(-9) topRowWithNull required_capability: agg_top +required_capability: extend_aggs_on_constants_support ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), @@ -71,6 +72,7 @@ null |-6 |null |null |null | topRowWithNull_WithBY required_capability: agg_top +required_capability: extend_aggs_on_constants_support ROW d=-9.81, i=25324, l=TO_LONG(-9), n=null, `by` = [1,2,3] | EVAL x=d+1+null, y=i/2+n, z=l+3, t=null | STATS double = TOP(x, 2, "asc"), From f27ccd0fbbf58a784f5037abc0036a19c871e105 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Mon, 7 Oct 2024 15:44:51 +0300 Subject: [PATCH 10/16] More bwc checks --- .../esql/qa/testFixtures/src/main/resources/keep.csv-spec | 1 + .../esql/qa/testFixtures/src/main/resources/stats.csv-spec | 2 ++ .../testFixtures/src/main/resources/stats_percentile.csv-spec | 2 ++ 3 files changed, 5 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 6bc534a9fd918..f81fe9d72ba1c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -427,6 +427,7 @@ null | 74999 ; evalWithNullAndAvg +required_capability: extend_aggs_on_constants_support from employees | eval nullsum = salary + null | stats avg(nullsum), count(nullsum); avg(nullsum):double | count(nullsum):long diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index d143570b2d217..2ab2a4445dae5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2281,6 +2281,7 @@ null weightedAvgWeightZero required_capability: agg_weighted_avg +required_capability: extend_aggs_on_constants_support from employees | eval w = 0 | stats w_avg = weighted_avg(salary, w) @@ -2294,6 +2295,7 @@ null weightedAvgWeightZeroExp required_capability: agg_weighted_avg +required_capability: extend_aggs_on_constants_support from employees | eval w = 0 + 0 | stats w_avg = weighted_avg(salary, w) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index a3a41d54a33de..d9f134624a4b3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -58,6 +58,7 @@ percentile(value200, p90):double|percentile(value200, p100):double|percentile(va ; percentileOfNullsOnRealIndex +required_capability: extend_aggs_on_constants_support from employees | eval x = null::integer, y = null, @@ -252,6 +253,7 @@ p80_max_salary_change:double constantsFrom required_capability: fn_mv_percentile +required_capability: extend_aggs_on_constants_support from employees | eval single = 7, mv = [1, 7, 10] | stats From 020bc9f37fef102ebeb3390cd6c55cbb27a988c1 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Tue, 15 Oct 2024 17:03:11 +0300 Subject: [PATCH 11/16] Cleanup after merging "main" --- .../xpack/esql/optimizer/LogicalPlanOptimizerTests.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 1e1fc8afd64c8..9d2eff90ce647 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -5615,10 +5615,6 @@ public void testMatchFunctionIsNotNullable() { ); } - private Literal nullOf(DataType dataType) { - return new Literal(Source.EMPTY, null, dataType); - } - public void testCheckPercentileFoldableSecondArgument() { VerificationException e = expectThrows(VerificationException.class, () -> plan(""" from test From 178c3831849330964e80f6ae56a3b605084b48e3 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Wed, 16 Oct 2024 10:32:02 +0300 Subject: [PATCH 12/16] Skip one more test --- .../esql/qa/testFixtures/src/main/resources/stats.csv-spec | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index bd7721f5fb99b..59a1cae33a9d8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2242,6 +2242,7 @@ null weightedAvgWeightConstantMvWarning required_capability: agg_weighted_avg +required_capability: extend_aggs_on_constants_support from employees | eval w = [1, 2, 3] | stats w_avg = weighted_avg(salary, w) From 29dd29f7f7ae4f60d210639717e1c126a20ccaf6 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Wed, 16 Oct 2024 15:15:55 +0300 Subject: [PATCH 13/16] Change one test according to reviews --- .../testFixtures/src/main/resources/stats_percentile.csv-spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec index d9f134624a4b3..a1efd2027b93f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_percentile.csv-spec @@ -1,7 +1,7 @@ percentileOfNull required_capability: extend_aggs_on_constants_support row x = null::integer, y = null, z = 123 - null -| stats percIntNull = percentile(y, 90), +| stats percIntNull = percentile(x, 90), percNull = percentile(y, 90), percentile(null, 90), percentile(null+2, 90), From 8ab89dcb85c9e46b968fe6bc75f4728fe3f7567a Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Wed, 16 Oct 2024 17:50:55 +0300 Subject: [PATCH 14/16] More reviews --- .../function/aggregate/AggregateFunction.java | 3 + .../optimizer/LogicalPlanOptimizerTests.java | 26 ------ .../optimizer/ValidatableVerifierTests.java | 79 +++++++++++++++++++ 3 files changed, 82 insertions(+), 26 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java index c40d1e1a64c49..b9dc906ea0e17 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/AggregateFunction.java @@ -117,6 +117,9 @@ public Expression filter() { return filter; } + /** + * Indicates if an aggregate function can be folded away when foldable arguments are given to it. + */ public boolean isConstantFoldable() { return true; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 9d2eff90ce647..21d78dcd5e64c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -5615,32 +5615,6 @@ public void testMatchFunctionIsNotNullable() { ); } - public void testCheckPercentileFoldableSecondArgument() { - VerificationException e = expectThrows(VerificationException.class, () -> plan(""" - from test - | stats x = percentile(languages, languages) by emp_no - """)); - assertTrue(e.getMessage().startsWith("Found ")); - final String header = "Found 1 problem\nline "; - assertEquals( - "2:35: second argument of [percentile(languages, languages)] must be a constant, received [languages]", - e.getMessage().substring(header.length()) - ); - } - - public void testCheckCountDistinctFoldableSecondArgument() { - VerificationException e = expectThrows(VerificationException.class, () -> plan(""" - from test - | stats x = count_distinct(languages, languages) by emp_no - """)); - assertTrue(e.getMessage().startsWith("Found ")); - final String header = "Found 1 problem\nline "; - assertEquals( - "2:39: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", - e.getMessage().substring(header.length()) - ); - } - /** * eval x = null + 1 | .... | stats avg(x) * and diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java new file mode 100644 index 0000000000000..c4ca216b64191 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.junit.BeforeClass; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; + +public class ValidatableVerifierTests extends ESTestCase { + + private static EsqlParser parser; + private static Analyzer analyzer; + private static LogicalPlanOptimizer optimizer; + private static Map mapping; + + @BeforeClass + public static void init() { + parser = new EsqlParser(); + optimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(EsqlTestUtils.TEST_CFG)); + + mapping = loadMapping("mapping-basic.json"); + EsIndex test = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD)); + IndexResolution getIndexResult = IndexResolution.valid(test); + analyzer = new Analyzer( + new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, EsqlTestUtils.emptyPolicyResolution()), + TEST_VERIFIER + ); + } + + public void testCheckPercentileFoldableSecondArgument() { + assertEquals( + "1:45: second argument of [percentile(languages, languages)] must be a constant, received [languages]", + error("from test | stats x = percentile(languages, languages) by emp_no") + ); + } + + public void testCheckCountDistinctFoldableSecondArgument() { + assertEquals( + "1:49: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", + error("from test | stats x = count_distinct(languages, languages) by emp_no") + ); + } + + private static String error(String query) { + Throwable e = expectThrows(VerificationException.class, () -> optimizer.optimize(analyzer.analyze(parser.createStatement(query)))); + + String message = e.getMessage(); + assertTrue(message.startsWith("Found ")); + + String pattern = "\nline "; + int index = message.indexOf(pattern); + return message.substring(index + pattern.length()); + } + + protected List filteredWarnings() { + return withDefaultLimitWarning(super.filteredWarnings()); + } +} From db2259ba9a339d0d153cd2195467df8aa59d2c2e Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Fri, 18 Oct 2024 11:18:05 +0300 Subject: [PATCH 15/16] Bug fix related to the source() used when creating the Validations error message --- .../xpack/esql/expression/Validations.java | 6 ++-- .../function/aggregate/CountDistinct.java | 2 +- .../function/aggregate/Percentile.java | 2 +- .../expression/function/grouping/Bucket.java | 8 ++---- .../convert/FoldablesConvertFunction.java | 2 +- .../function/scalar/multivalue/MvSort.java | 3 +- .../optimizer/ValidatableVerifierTests.java | 28 +++++++++++++++++-- 7 files changed, 36 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Validations.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Validations.java index ffcc26cb6f188..f3b1c647ed38e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Validations.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Validations.java @@ -19,8 +19,8 @@ private Validations() {} /** * Validates if the given expression is foldable - if not returns a Failure. */ - public static Failure isFoldable(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) { - TypeResolution resolution = TypeResolutions.isFoldable(e, operationName, paramOrd); - return resolution.unresolved() ? Failure.fail(e, resolution.message()) : null; + public static Failure isFoldable(Expression e, Expression rootExpression, TypeResolutions.ParamOrdinal paramOrd) { + TypeResolution resolution = TypeResolutions.isFoldable(e, rootExpression.sourceText(), paramOrd); + return resolution.unresolved() ? Failure.fail(rootExpression, resolution.message()) : null; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 184b61bf9a178..b993c618b1da4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -202,7 +202,7 @@ protected TypeResolution resolveType() { public void validate(Failures failures) { if (precision != null) { - failures.add(Validations.isFoldable(precision, sourceText(), SECOND)); + failures.add(Validations.isFoldable(precision, this, SECOND)); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java index 922743fce17b8..78a7dd4bbdabe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Percentile.java @@ -160,7 +160,7 @@ protected TypeResolution resolveType() { @Override public void validate(Failures failures) { - failures.add(Validations.isFoldable(percentile, sourceText(), SECOND)); + failures.add(Validations.isFoldable(percentile, this, SECOND)); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java index 3357b2abf0e0f..35047b3ebc545 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java @@ -398,11 +398,9 @@ private static TypeResolution isStringOrDate(Expression e, String operationName, @Override public void validate(Failures failures) { - String operation = sourceText(); - - failures.add(isFoldable(buckets, operation, SECOND)) - .add(from != null ? isFoldable(from, operation, THIRD) : null) - .add(to != null ? isFoldable(to, operation, FOURTH) : null); + failures.add(isFoldable(buckets, this, SECOND)) + .add(from != null ? isFoldable(from, this, THIRD) : null) + .add(to != null ? isFoldable(to, this, FOURTH) : null); } private long foldToLong(Expression e) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java index 6e2b5bb63532d..dc6dc34c6e287 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java @@ -70,6 +70,6 @@ public final Object fold() { @Override public final void validate(Failures failures) { - failures.add(isFoldable(field(), sourceText(), null)); + failures.add(isFoldable(field(), this, null)); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java index d9e41233952de..898362d302374 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java @@ -234,8 +234,7 @@ public void validate(Failures failures) { if (order == null) { return; } - String operation = sourceText(); - failures.add(isFoldable(order, operation, SECOND)); + failures.add(isFoldable(order, this, SECOND)); if (isValidOrder() == false) { failures.add( Failure.fail(order, INVALID_ORDER_ERROR, sourceText(), ASC.value(), DESC.value(), ((BytesRef) order.fold()).utf8ToString()) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java index c4ca216b64191..88168e3677918 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/ValidatableVerifierTests.java @@ -27,6 +27,10 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +/** + * Tests queries that should throw VerificationExceptions after the logical optimization took place. + * These exceptions will come from running the Validatable.validate specific method. + */ public class ValidatableVerifierTests extends ESTestCase { private static EsqlParser parser; @@ -50,16 +54,36 @@ public static void init() { public void testCheckPercentileFoldableSecondArgument() { assertEquals( - "1:45: second argument of [percentile(languages, languages)] must be a constant, received [languages]", + "1:23: second argument of [percentile(languages, languages)] must be a constant, received [languages]", error("from test | stats x = percentile(languages, languages) by emp_no") ); + + assertEquals( + "1:44: second argument of [percentile(l, l)] must be a constant, received [languages]", + error("from test | eval l = languages | stats x = percentile(l, l) by emp_no") + ); + + assertEquals( + "1:47: second argument of [percentile(l, l)] must be a constant, received [languages]", + error("from test | rename languages AS l | stats x = percentile(l, l) by emp_no") + ); } public void testCheckCountDistinctFoldableSecondArgument() { assertEquals( - "1:49: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", + "1:23: second argument of [count_distinct(languages, languages)] must be a constant, received [languages]", error("from test | stats x = count_distinct(languages, languages) by emp_no") ); + + assertEquals( + "1:44: second argument of [count_distinct(l, l)] must be a constant, received [languages]", + error("from test | eval l = languages | stats x = count_distinct(l, l) by emp_no") + ); + + assertEquals( + "1:47: second argument of [count_distinct(l, l)] must be a constant, received [languages]", + error("from test | rename languages AS l | stats x = count_distinct(l, l) by emp_no") + ); } private static String error(String query) { From 1ef3b878cdb37300671562c7472ce5f36ee4f6be Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 28 Nov 2024 12:45:27 +0200 Subject: [PATCH 16/16] Fix some things after pull from main --- .../qa/testFixtures/src/main/resources/spatial.csv-spec | 2 +- .../qa/testFixtures/src/main/resources/stats.csv-spec | 9 +++++++++ .../xpack/esql/expression/function/aggregate/StdDev.java | 5 +++++ .../optimizer/rules/logical/PropagateEvalFoldables.java | 4 +++- .../xpack/esql/optimizer/LogicalPlanOptimizerTests.java | 6 +++--- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index 5775ee8093ad2..2a74fccfb2291 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -45,7 +45,7 @@ FROM airports centroidEvalNull = ST_CENTROID_AGG(z); centroidNull:null|centroidExpNull:geo_point|centroidEvalNull:geo_point -POINT (NaN NaN) |null |POINT (NaN NaN) +POINT (NaN NaN) |null |null ; centroidFromStringNested diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index cb2b7ea0d4655..318dc1531fa55 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -3244,3 +3244,12 @@ from employees | eval x = [1,2,3], y = 5 + 6 | stats max(x) by y; max(x):integer|y:integer 3 |11 ; + +// see +statsByPropagateableFoldableValue_WithDropGroupings-Ignore +row field = 555, otherfield = [1,2] | stats min(field) by otherfield | drop otherfield; + +min(field):integer +555 +555 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/StdDev.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/StdDev.java index 189b6a81912cb..876a19fa81d98 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/StdDev.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/StdDev.java @@ -91,6 +91,11 @@ public StdDev replaceChildren(List newChildren) { return new StdDev(source(), newChildren.get(0), newChildren.get(1)); } + @Override + public boolean isConstantFoldable() { + return false; + } + public StdDev withFilter(Expression filter) { return new StdDev(source(), field(), filter); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java index 2f3fe744bb638..5b7488425811a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java @@ -59,7 +59,9 @@ public LogicalPlan apply(LogicalPlan plan) { } else if (p instanceof Aggregate agg) { List newAggs = new ArrayList<>(agg.aggregates().size()); agg.aggregates().forEach(e -> { - if (Alias.unwrap(e) instanceof AggregateFunction af && af.isConstantFoldable()) { + // don't touch the aggregate functions that have a filter. The filter can eliminate the value altogether or change + // it to something else (see ReplaceStatsFilteredAggWithEval) + if (Alias.unwrap(e) instanceof AggregateFunction af && af.isConstantFoldable() && af.hasFilter() == false) { newAggs.add((NamedExpression) e.transformUp(ReferenceAttribute.class, replaceReference)); } else { newAggs.add(e); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 8f19857bdfdd8..927fee9450795 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -4110,7 +4110,7 @@ public void testBucketFailsOnFieldArgument() { assertTrue(e.getMessage().startsWith("Found ")); final String header = "Found 1 problem\nline "; assertEquals( - "3:31: third argument of [bucket(salary, 10, emp_no, bucket_end)] must be a constant, received [emp_no]", + "3:12: third argument of [bucket(salary, 10, emp_no, bucket_end)] must be a constant, received [emp_no]", e.getMessage().substring(header.length()) ); } @@ -6318,7 +6318,7 @@ public void testToDatePeriodToTimeDurationWithField() { e = expectThrows(VerificationException.class, () -> planTypes(""" from types | EVAL x = date - to_timeduration(keyword)""")); assertEquals( - "1:47: argument of [to_timeduration(keyword)] must be a constant, received [keyword]", + "1:31: argument of [to_timeduration(keyword)] must be a constant, received [keyword]", e.getMessage().substring(header.length()) ); @@ -6329,7 +6329,7 @@ public void testToDatePeriodToTimeDurationWithField() { e = expectThrows(VerificationException.class, () -> planTypes(""" from types | EVAL x = keyword, y = date - to_timeduration(x)""")); - assertEquals("1:60: argument of [to_timeduration(x)] must be a constant, received [x]", e.getMessage().substring(header.length())); + assertEquals("1:44: argument of [to_timeduration(x)] must be a constant, received [x]", e.getMessage().substring(header.length())); } // These should pass eventually once we lift some restrictions on match function