-
Notifications
You must be signed in to change notification settings - Fork 25.8k
ESQL: Correctly manage NULL data type for SUM #144942
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
813b556
ff11c3d
5f9e939
e31c098
e204268
c9c8331
dccf93a
71470e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| area: ES|QL | ||
| issues: | ||
| - 144914 | ||
| pr: 144942 | ||
| summary: Correctly manage NULL data type for SUM | ||
| type: bug |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -584,3 +584,233 @@ FROM employees | WHERE emp_no IN (10001, null) AND first_name IS NOT NULL | SORT | |
| emp_no:integer | first_name:keyword | ||
| 10001 | Georgi | ||
| ; | ||
|
|
||
| sumOfNullWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x = 1 | ||
| | STATS s = SUM(null) | ||
| | EVAL r = s + 1 | ||
| ; | ||
|
|
||
| s:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| sumOfNullPlusNullWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x = 1 | ||
| | STATS s = SUM(null + null) | ||
| | EVAL r = s + 1 | ||
| ; | ||
|
|
||
| s:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| sumOfNullPlusOneWithEval | ||
|
|
||
| ROW x = 1 | ||
| | STATS s = SUM(null + 1) | ||
| | EVAL r = s + 1 | ||
| ; | ||
|
|
||
| s:long | r:long | ||
| null | null | ||
|
Comment on lines
+619
to
+620
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Subtle, but looks right! |
||
| ; | ||
|
|
||
| sumOfEvalNullWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x = 1 | ||
| | EVAL y = null | ||
| | STATS s = SUM(y) | ||
| | EVAL r = s + 1 | ||
| ; | ||
|
|
||
| s:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| sumOfNullifiedUnmappedWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
| required_capability: optional_fields_nullify_tech_preview | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS s = SUM(foo) | ||
| | EVAL r = s + 1 | ||
| ; | ||
|
|
||
| s:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| avgOfNullWithEval | ||
|
|
||
| ROW x = 1 | ||
| | STATS a = AVG(null) | ||
| | EVAL r = a + 1 | ||
| ; | ||
|
|
||
| a:double | r:double | ||
| null | null | ||
| ; | ||
|
|
||
| avgOfNullPlusNullWithEval | ||
|
|
||
| ROW x = 1 | ||
| | STATS a = AVG(null + null) | ||
| | EVAL r = a + 1 | ||
| ; | ||
|
|
||
| a:double | r:double | ||
| null | null | ||
| ; | ||
|
|
||
| avgOfEvalNullWithEval | ||
|
|
||
| ROW x = 1 | ||
| | EVAL y = null | ||
| | STATS a = AVG(y) | ||
| | EVAL r = a + 1 | ||
| ; | ||
|
|
||
| a:double | r:double | ||
| null | null | ||
| ; | ||
|
|
||
| avgOfNullifiedUnmappedWithEval | ||
| required_capability: optional_fields_nullify_tech_preview | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS a = AVG(foo) | ||
| | EVAL r = a + 1 | ||
| ; | ||
|
|
||
| a:double | r:double | ||
| null | null | ||
| ; | ||
|
|
||
| minOfNullWithEval | ||
|
|
||
| ROW x = 1 | ||
| | STATS m = MIN(null) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| minOfEvalNullWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x = 1 | ||
| | EVAL y = null | ||
| | STATS m = MIN(y) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| minOfNullifiedUnmappedWithEval | ||
| required_capability: optional_fields_nullify_tech_preview | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS m = MIN(foo) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| maxOfNullWithEval | ||
|
|
||
| ROW x = 1 | ||
| | STATS m = MAX(null) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| maxOfEvalNullWithEval | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x = 1 | ||
| | EVAL y = null | ||
| | STATS m = MAX(y) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| maxOfNullifiedUnmappedWithEval | ||
| required_capability: optional_fields_nullify_tech_preview | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS m = MAX(foo) | ||
| | EVAL r = m + 1 | ||
| ; | ||
|
|
||
| m:null | r:integer | ||
| null | null | ||
| ; | ||
|
|
||
| multipleAggsOverNullExpressions | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests starting with this one were inspired by my previous unmerged PR #112392. |
||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| ROW x=1, y=2 | ||
| | STATS c=count(null + 1), | ||
| c_d=count_distinct(1 + null), | ||
| a=avg(null + x), | ||
| mi=min(y + null), | ||
| ma=max(y + x * null), | ||
| s=sum(null) | ||
| ; | ||
|
|
||
| c:long | c_d:long | a:double | mi:integer | ma:integer | s:null | ||
| 0 |0 |null |null |null |null | ||
| ; | ||
|
|
||
| countOfNullWithGrouping | ||
|
|
||
| ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a | ||
| ; | ||
|
|
||
| COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer | ||
| 0 |0 |0 |1 | ||
| 0 |0 |0 |2 | ||
| 0 |0 |0 |3 | ||
| ; | ||
|
|
||
| countOfNullGroupedByFoldableNull | ||
|
|
||
| ROW a = 1+null, c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a | ||
| ; | ||
|
|
||
| COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer | ||
| 0 |0 |0 |null | ||
| ; | ||
|
|
||
| countDistinctOfNullWithGrouping | ||
|
|
||
| ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT_DISTINCT(c), COUNT_DISTINCT(null), COUNT_DISTINCT(null - 1) BY a | ||
| ; | ||
|
|
||
| COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a:integer | ||
| 0 |0 |0 |1 | ||
| 0 |0 |0 |2 | ||
| 0 |0 |0 |3 | ||
| ; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -291,13 +291,14 @@ foo:integer | |
|
|
||
| statsAggs | ||
| required_capability: optional_fields_nullify_tech_preview | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS s = SUM(foo) | ||
| ; | ||
|
|
||
| s:double | ||
| s:null | ||
| null | ||
| ; | ||
|
|
||
|
|
@@ -315,13 +316,14 @@ null | |
|
|
||
| statsAggsGrouped | ||
| required_capability: optional_fields_nullify_tech_preview | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | STATS s = SUM(foo) BY bar | ||
| ; | ||
|
|
||
| s:double | bar:null | ||
| s:null | bar:null | ||
| null | null | ||
| ; | ||
|
|
||
|
|
@@ -583,13 +585,14 @@ null | 1985 | null | abc | fork3 | |
|
|
||
| inlinestatsCount | ||
| required_capability: optional_fields_nullify_tech_preview | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| ROW x = 1 | ||
| | INLINE STATS c = COUNT(*), s = SUM(does_not_exist) BY d = does_not_exist | ||
| ; | ||
|
|
||
| x:integer | does_not_exist:null | c:long | s:double | d:null | ||
| x:integer | does_not_exist:null | c:long | s:null | d:null | ||
| 1 | null | 1 | null | null | ||
| ; | ||
|
|
||
|
|
@@ -818,14 +821,15 @@ null | 2024-05-10T00:00:00.000Z | |
| maxOverTimeOfDoubleNoGroupingUnmappedNullify | ||
| required_capability: optional_fields_nullify_tech_preview | ||
| required_capability: ts_command_v0 | ||
| required_capability: fix_sum_of_null_optimization | ||
|
|
||
| SET unmapped_fields="nullify"\; | ||
| TS k8s_unmapped | ||
| | STATS cost=sum(max_over_time(network.cost)) BY time_bucket = bucket(@timestamp,1minute) | ||
| | SORT time_bucket | LIMIT 10 | ||
| ; | ||
|
|
||
| cost:double | time_bucket:datetime | ||
| cost:null | time_bucket:datetime | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++! |
||
| null | 2024-05-10T00:00:00.000Z | ||
| null | 2024-05-10T00:01:00.000Z | ||
| null | 2024-05-10T00:02:00.000Z | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix looks right to me. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,6 +44,7 @@ | |
| import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; | ||
| import static org.elasticsearch.xpack.esql.core.type.DataType.EXPONENTIAL_HISTOGRAM; | ||
| import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; | ||
| import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; | ||
| import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; | ||
|
|
||
| /** | ||
|
|
@@ -121,6 +122,9 @@ public Sum withFilter(Expression filter) { | |
| @Override | ||
| public DataType dataType() { | ||
| DataType dt = field().dataType(); | ||
| if (dt == DataType.NULL) { | ||
| return DataType.NULL; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO, a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this in #142657. Generally, I think a For
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The practical implication is that, for |
||
| } | ||
| if (dt == DataType.DENSE_VECTOR) { | ||
| return DataType.DENSE_VECTOR; | ||
| } | ||
|
|
@@ -211,9 +215,14 @@ public Expression surrogate() { | |
| ); | ||
| } | ||
|
|
||
| // SUM(const) is equivalent to MV_SUM(const)*COUNT(*). | ||
| return field.foldable() | ||
| ? new Mul(s, new MvSum(s, field), new Count(s, Literal.keyword(s, StringUtils.WILDCARD), filter(), window())) | ||
| : null; | ||
| if (field.foldable()) { | ||
| if (field().dataType() == NULL) { | ||
| return new Literal(s, null, NULL); | ||
| } | ||
| // SUM(const) is equivalent to MV_SUM(const)*COUNT(*). | ||
| return new Mul(s, new MvSum(s, field), new Count(s, Literal.keyword(s, StringUtils.WILDCARD), filter(), window())); | ||
| } else { | ||
| return null; | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
++, thank you!