diff --git a/docs/changelog/144942.yaml b/docs/changelog/144942.yaml new file mode 100644 index 0000000000000..337f82a825cea --- /dev/null +++ b/docs/changelog/144942.yaml @@ -0,0 +1,6 @@ +area: ES|QL +issues: + - 144914 +pr: 144942 +summary: Correctly manage NULL data type for SUM +type: bug diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec index c615d3edc793f..c0c49756a539c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec @@ -584,3 +584,233 @@ FROM employees | WHERE emp_no IN (10001, null) AND first_name IS NOT NULL | SORT emp_no:integer | first_name:keyword 10001 | Georgi ; + +sumOfNullWithEval +required_capability: fix_sum_of_null_optimization + +ROW x = 1 +| STATS s = SUM(null) +| EVAL r = s + 1 +; + +s:null | r:integer +null | null +; + +sumOfNullPlusNullWithEval +required_capability: fix_sum_of_null_optimization + +ROW x = 1 +| STATS s = SUM(null + null) +| EVAL r = s + 1 +; + +s:null | r:integer +null | null +; + +sumOfNullPlusOneWithEval + +ROW x = 1 +| STATS s = SUM(null + 1) +| EVAL r = s + 1 +; + +s:long | r:long +null | null +; + +sumOfEvalNullWithEval +required_capability: fix_sum_of_null_optimization + +ROW x = 1 +| EVAL y = null +| STATS s = SUM(y) +| EVAL r = s + 1 +; + +s:null | r:integer +null | null +; + +sumOfNullifiedUnmappedWithEval +required_capability: fix_sum_of_null_optimization +required_capability: optional_fields_nullify_tech_preview + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = SUM(foo) +| EVAL r = s + 1 +; + +s:null | r:integer +null | null +; + +avgOfNullWithEval + +ROW x = 1 +| STATS a = AVG(null) +| EVAL r = a + 1 +; + +a:double | r:double +null | null +; + +avgOfNullPlusNullWithEval + +ROW x = 1 +| STATS a = AVG(null + null) +| EVAL r = a + 1 +; + +a:double | r:double +null | null +; + +avgOfEvalNullWithEval + +ROW x = 1 +| EVAL y = null +| STATS a = AVG(y) +| EVAL r = a + 1 +; + +a:double | r:double +null | null +; + +avgOfNullifiedUnmappedWithEval +required_capability: optional_fields_nullify_tech_preview + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS a = AVG(foo) +| EVAL r = a + 1 +; + +a:double | r:double +null | null +; + +minOfNullWithEval + +ROW x = 1 +| STATS m = MIN(null) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +minOfEvalNullWithEval +required_capability: fix_sum_of_null_optimization + +ROW x = 1 +| EVAL y = null +| STATS m = MIN(y) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +minOfNullifiedUnmappedWithEval +required_capability: optional_fields_nullify_tech_preview + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS m = MIN(foo) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +maxOfNullWithEval + +ROW x = 1 +| STATS m = MAX(null) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +maxOfEvalNullWithEval +required_capability: fix_sum_of_null_optimization + +ROW x = 1 +| EVAL y = null +| STATS m = MAX(y) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +maxOfNullifiedUnmappedWithEval +required_capability: optional_fields_nullify_tech_preview + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS m = MAX(foo) +| EVAL r = m + 1 +; + +m:null | r:integer +null | null +; + +multipleAggsOverNullExpressions +required_capability: fix_sum_of_null_optimization + +ROW x=1, y=2 +| STATS c=count(null + 1), + c_d=count_distinct(1 + null), + a=avg(null + x), + mi=min(y + null), + ma=max(y + x * null), + s=sum(null) +; + + c:long | c_d:long | a:double | mi:integer | ma:integer | s:null +0 |0 |null |null |null |null +; + +countOfNullWithGrouping + +ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a +; + +COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer +0 |0 |0 |1 +0 |0 |0 |2 +0 |0 |0 |3 +; + +countOfNullGroupedByFoldableNull + +ROW a = 1+null, c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a +; + +COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer +0 |0 |0 |null +; + +countDistinctOfNullWithGrouping + +ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT_DISTINCT(c), COUNT_DISTINCT(null), COUNT_DISTINCT(null - 1) BY a +; + +COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a:integer +0 |0 |0 |1 +0 |0 |0 |2 +0 |0 |0 |3 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 64b038d31a8c6..148521562450a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2011,11 +2011,21 @@ s2point1:l | s_mv:l | s_null:l | s_expr:l | s_expr_null:l | rows:l | languages:i sumOfConst#[skip:-8.13.99,reason:supported in 8.14] FROM employees -| STATS s1 = sum(1), s2point1 = sum(2.1), s_mv = sum([-1, 0, 3]) * 3, s_null = sum(null), s_expr = sum(1+1), s_expr_null = sum(1+null), rows = count(*) +| STATS s1 = sum(1), s2point1 = sum(2.1), s_mv = sum([-1, 0, 3]) * 3, s_expr = sum(1+1), s_expr_null = sum(1+null), rows = count(*) ; -s1:l | s2point1:d | s_mv:l | s_null:d | s_expr:l | s_expr_null:l | rows:l -100 | 210.0 | 600 | null | 200 | null | 100 +s1:l | s2point1:d | s_mv:l | s_expr:l | s_expr_null:l | rows:l +100 | 210.0 | 600 | 200 | null | 100 +; + +sumOfNullConst +required_capability: fix_sum_of_null_optimization +FROM employees +| STATS s_null = sum(null) +; + +s_null:null +null ; sumOfConstGrouped#[skip:-8.13.99,reason:supported in 8.14] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec index 00550ca0a1eb1..12916c435a2af 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec @@ -291,13 +291,14 @@ foo:integer statsAggs required_capability: optional_fields_nullify_tech_preview +required_capability: fix_sum_of_null_optimization SET unmapped_fields="nullify"\; ROW x = 1 | STATS s = SUM(foo) ; -s:double +s:null null ; @@ -315,13 +316,14 @@ null statsAggsGrouped required_capability: optional_fields_nullify_tech_preview +required_capability: fix_sum_of_null_optimization SET unmapped_fields="nullify"\; ROW x = 1 | STATS s = SUM(foo) BY bar ; -s:double | bar:null +s:null | bar:null null | null ; @@ -583,13 +585,14 @@ null | 1985 | null | abc | fork3 inlinestatsCount required_capability: optional_fields_nullify_tech_preview +required_capability: fix_sum_of_null_optimization SET unmapped_fields="nullify"\; ROW x = 1 | INLINE STATS c = COUNT(*), s = SUM(does_not_exist) BY d = does_not_exist ; -x:integer | does_not_exist:null | c:long | s:double | d:null +x:integer | does_not_exist:null | c:long | s:null | d:null 1 | null | 1 | null | null ; @@ -818,6 +821,7 @@ null | 2024-05-10T00:00:00.000Z maxOverTimeOfDoubleNoGroupingUnmappedNullify required_capability: optional_fields_nullify_tech_preview required_capability: ts_command_v0 +required_capability: fix_sum_of_null_optimization SET unmapped_fields="nullify"\; TS k8s_unmapped @@ -825,7 +829,7 @@ TS k8s_unmapped | SORT time_bucket | LIMIT 10 ; -cost:double | time_bucket:datetime +cost:null | time_bucket:datetime null | 2024-05-10T00:00:00.000Z null | 2024-05-10T00:01:00.000Z null | 2024-05-10T00:02:00.000Z diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index a881994e445c2..a62f83eb4db4e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -2384,6 +2384,12 @@ public enum Cap { */ UNMAPPED_FIELDS_DEFAULT_SETTING_RENAME, + /** + * Fix for {@code SUM(null)} producing a type mismatch after surrogate expansion. + * See https://github.com/elastic/elasticsearch/issues/144914 + */ + FIX_SUM_OF_NULL_OPTIMIZATION, + // Last capability should still have a comma for fewer merge conflicts when adding new ones :) // This comment prevents the semicolon from being on the previous capability when Spotless formats the file. ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java index e9533ac1b85af..2a5ee15d53fa9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Sum.java @@ -44,6 +44,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.EXPONENTIAL_HISTOGRAM; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; /** @@ -121,6 +122,9 @@ public Sum withFilter(Expression filter) { @Override public DataType dataType() { DataType dt = field().dataType(); + if (dt == DataType.NULL) { + return DataType.NULL; + } if (dt == DataType.DENSE_VECTOR) { return DataType.DENSE_VECTOR; } @@ -211,9 +215,14 @@ public Expression surrogate() { ); } - // SUM(const) is equivalent to MV_SUM(const)*COUNT(*). - return field.foldable() - ? new Mul(s, new MvSum(s, field), new Count(s, Literal.keyword(s, StringUtils.WILDCARD), filter(), window())) - : null; + if (field.foldable()) { + if (field().dataType() == NULL) { + return new Literal(s, null, NULL); + } + // SUM(const) is equivalent to MV_SUM(const)*COUNT(*). + return new Mul(s, new MvSum(s, field), new Count(s, Literal.keyword(s, StringUtils.WILDCARD), filter(), window())); + } else { + return null; + } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e6a309d981d98..46f1072b2e5be 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -4590,7 +4590,7 @@ public void testCountOfLiteral() { * \_Project[[s{r}#3, s_expr{r}#5, s_null{r}#7, w{r}#10]] * \_Project[[s{r}#3, s_expr{r}#5, s_null{r}#7, w{r}#10]] * \_Eval[[MVSUM([1, 2][INTEGER]) * $$COUNT$s$0{r}#25 AS s, MVSUM(314.0[DOUBLE] / 100[INTEGER]) * $$COUNT$s$0{r}#25 AS s - * _expr, MVSUM(null[NULL]) * $$COUNT$s$0{r}#25 AS s_null]] + * _expr, null[NULL] AS s_null]] * \_Aggregate[[w{r}#10],[COUNT(*[KEYWORD]) AS $$COUNT$s$0, w{r}#10]] * \_Eval[[emp_no{f}#15 % 2[INTEGER] AS w]] * \_EsRelation[test][_meta_field{f}#21, emp_no{f}#15, first_name{f}#16, ..] @@ -4631,14 +4631,12 @@ public void testSumOfLiteral() { var count_expr = as(mul_expr.right(), ReferenceAttribute.class); assertThat(count_expr.name(), equalTo("$$COUNT$s$0")); - // s_null == mv_sum(null) * count(*) + // s_null == null (literal) — SUM(null) short-circuits to a null literal of NULL type var s_null = as(exprs.get(2), Alias.class); assertThat(s_null.name(), equalTo("s_null")); - var mul_null = as(s_null.child(), Mul.class); - var mvSum_null = as(mul_null.left(), MvSum.class); - assertThat(mvSum_null.field(), equalTo(NULL)); - var count_null = as(mul_null.right(), ReferenceAttribute.class); - assertThat(count_null.name(), equalTo("$$COUNT$s$0")); + var nullLiteral = as(s_null.child(), Literal.class); + assertNull(nullLiteral.value()); + assertThat(nullLiteral.dataType(), equalTo(DataType.NULL)); var countAgg = as(Alias.unwrap(agg.aggregates().get(0)), Count.class); assertThat(countAgg.children().get(0), instanceOf(Literal.class));