Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5159109
Add separate rule for dealing with nulls in aggregations
astefan Jul 8, 2024
74129a0
One more test from one of the reported bugs
astefan Aug 30, 2024
ef3960e
Addressing reviews
astefan Sep 9, 2024
c2db11c
Make count_distinct deal with Validateable interface
astefan Sep 18, 2024
d66004b
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Sep 18, 2024
aedad55
More count_distinct fixes, more tests for percentile's foldable second
astefan Sep 19, 2024
47db145
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 1, 2024
4c03ce2
Add more tests
astefan Oct 3, 2024
0e8d484
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 3, 2024
33a8587
Update docs/changelog/112392.yaml
astefan Oct 3, 2024
636a6e0
Address feedback
astefan Oct 3, 2024
0f83b2b
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 3, 2024
786daa6
Merge branch 'aggregations_over_constants' of https://github.com/aste…
astefan Oct 3, 2024
5000836
More capabilities to csv-spec files
astefan Oct 4, 2024
7034814
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 4, 2024
f27ccd0
More bwc checks
astefan Oct 7, 2024
c444a61
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 7, 2024
d6ee3be
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Oct 15, 2024
020bc9f
Cleanup after merging "main"
astefan Oct 15, 2024
178c383
Skip one more test
astefan Oct 16, 2024
29dd29f
Change one test according to reviews
astefan Oct 16, 2024
8ab89dc
More reviews
astefan Oct 16, 2024
db2259b
Bug fix related to the source() used when creating the Validations
astefan Oct 18, 2024
db006d6
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Nov 27, 2024
1ef3b87
Fix some things after pull from main
astefan Nov 28, 2024
968dcfb
Merge branch 'main' of https://github.com/elastic/elasticsearch into …
astefan Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/changelog/112392.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pr: 112392
summary: "ES|QL: Improve aggregation over constants handling"
area: ES|QL
type: bug
issues:
- 110257
- 100170
- 104430
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ null | 74999
;

evalWithNullAndAvg
required_capability: extend_aggs_on_constants_support
from employees | eval nullsum = salary + null | stats avg(nullsum), count(nullsum);

avg(nullsum):double | count(nullsum):long
Expand Down
58 changes: 46 additions & 12 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ a:integer | b:integer | c:integer
;

evalRowWithNull
row a = 1, b = 2, c = null | eval z = c+b+a;
row a = 1, b = 2, c = null | eval z = c+b+a, t = a + null, u = c + null;

a:integer | b:integer | c:null | z:integer
1 | 2 | null | null
a:integer | b:integer | c:null | z:integer | t:integer | u:null
1 | 2 | null | null | null | null
;

evalRowWithNull2
Expand All @@ -96,10 +96,10 @@ a:integer | b:integer | c:null | null:null | z:integer
;

evalRowWithNull3
row a = 1, b = 2, x = round(null) | eval z = a+b+x;
row a = 1, b = 2, x = round(null) | eval z = a+b+x, t = a + round(null);

a:integer | b:integer | x:null | z:integer
1 | 2 | null | null
a:integer | b:integer | x:null | z:integer | t:integer
1 | 2 | null | null | null
;

evalRowWithRound
Expand Down Expand Up @@ -251,27 +251,61 @@ avg:double | min(x):integer | max(x):integer | count(x):long | avg(x):double | a
8.0 | 8 | 8 | 1 | 8.0 | 5.0 | 4.0
;

rowWithMultipleStats2
row a = 1+3 | eval a = 1 + a | stats avg = avg(1+3), min(5*2), max(3*3), count(123-123), avg(1-123), avg(a+123), count_distinct(5+6);

avg:double | min(5*2):integer | max(3*3):integer |count(123-123):long |avg(1-123):double |avg(a+123):double |count_distinct(5+6):long
4.0 |10 |9 |1 |-122.0 |128.0 |1
;

rowWithMultipleStatsOverNull
row x=1, y=2 | eval tot = null + y + x | stats c=count(tot), a=avg(tot), mi=min(tot), ma=max(tot), s=sum(tot);

c:long | a:double | mi:integer | ma:integer | s:long
0 | null | null | null | null
;

rowWithMultipleStatsOverNull2
row x=1, y=2
| stats c=count(null + 1),
c_d=count_distinct(1 + null),
a=avg(null + x),
mi=min(y + null),
ma=max(y + x * null),
s=sum(null);

c:long | c_d:long | a:double | mi:integer | ma:integer | s:double
0 |0 |null |null |null |null
;

rowWithMultipleStatsOverNull3
required_capability: extend_aggs_on_constants_support
ROW a = 1, b = "two", c = null
| STATS `a_count` = COUNT(`a`),
`a_cardinality` = COUNT_DISTINCT(`a`),
`b_count` = COUNT(`b`),
`b_cardinality` = COUNT_DISTINCT(`b`),
`c_count` = COUNT(`c`),
`c_cardinality` = COUNT_DISTINCT(`c`)
;

a_count:long |a_cardinality:long |b_count:long |b_cardinality:long |c_count:long | c_cardinality:long
1 |1 |1 |1 |0 |0
;

min
row l=1, d=1.0, ln=1 + null, dn=1.0 + null | stats min(l), min(d), min(ln), min(dn);
row l=1, d=1.0, ln=1 + null, dn=1.0 + null, n=null | stats min(l), min(d), min(ln), min(dn), n1=min(null), n2=min(null+123), n3=min(n);

min(l):integer | min(d):double | min(ln):integer | min(dn):double
1 | 1.0 | null | null
min(l):integer | min(d):double | min(ln):integer | min(dn):double | n1:null | n2:integer | n3:null
1 | 1.0 | null | null | null | null | null
;


sum
row l=1, d=1.0, ln=1 + null, dn=1.0 + null | stats sum(l), sum(d), sum(ln), sum(dn);
row l=1, d=1.0, ln=1 + null, dn=1.0 + null, n=null | stats sum(l), sum(d), sum(ln), sum(dn), s1=sum(null), s2=sum(123-null), s3=sum(n);

sum(l):long | sum(d):double | sum(ln):long | sum(dn):double
1 | 1.0 | null | null
sum(l):long | sum(d):double | sum(ln):long | sum(dn):double | s1:double | s2:long | s3:double
1 | 1.0 | null | null | null | null | null
;

boolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ wkt:keyword
["POINT(42.97109630194 14.7552534413725)", "POINT(75.8092915005895 22.727749187571)"] |[POINT(42.97109630194 14.7552534413725), POINT(75.8092915005895 22.727749187571)]
;

centroidOfNull
required_capability: extend_aggs_on_constants_support
FROM airports
| eval z = TO_GEOPOINT(null)
| STATS centroidNull = ST_CENTROID_AGG(null),
centroidExpNull = ST_CENTROID_AGG(TO_GEOPOINT(null::string)),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is converted to null, which I suppose is not expected from a user perspective. Is this something to be fixed later? Is it related with #108215?
Maybe it should have a comment or something here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++ The whole result here looks inconsistent.

I expected null since this is consistent with e.g. SUM and AVG over 0 rows/rows with only null values.

Currently, running the same aggregation on an empty index returns POINT (NaN NaN), which itself is inconsistent with the fact that we shouldn't have NaN values in our results - but maybe this consistent with geospatial standards?

In any case, the 3 results should be the same. But it's fine to fix this in a follow-up issue. Let's figure out what the result should be and throw this inconsistency into an issue (new or existing).

@craigtaverner , is POINT(NaN NaN) really the result we need to return?

Copy link
Contributor

@alex-spies alex-spies Oct 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per #106025, returning null is correct! Thanks @craigtaverner for pointing me to this issue!

centroidEvalNull = ST_CENTROID_AGG(z);

centroidNull:null|centroidExpNull:geo_point|centroidEvalNull:geo_point
POINT (NaN NaN) |null |null
;

centroidFromStringNested
required_capability: st_centroid_agg

Expand Down
167 changes: 167 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(More for a follow-up, but): Should we maybe move the aggs on const tests to their own file, like stats_over_consts.csv-spec? We may be approaching a number of test cases where it's hard to find out if we have a given test case or not.

Original file line number Diff line number Diff line change
Expand Up @@ -2006,6 +2006,123 @@ s2point1:l | s_mv:l | s_param:l | s_expr:l | s_expr_null:l | languages:i
1 | 4 | 4 | 1 | 0 | null
;

emptyStatsBy1
from employees | eval x = [1,2,3] | stats by x | eval z = case(null is null, null, 5);

x:integer |z:integer
1 |null
2 |null
3 |null
;

emptyStatsBy2
required_capability: extend_aggs_on_constants_support
from employees | eval x = [1,2,3], y = null | stats max(y) by x;

max(y):null |x:integer
null |1
null |2
null |3
;

statsOfPropagateableConst
row foo="unused"
| eval mv=[1,2,3]
| stats avg = avg(mv),
avg([5,6]),
min(mv),
min([5,6]),
max(mv),
max([5,6]),
count(mv),
count([5,6]),
count_distinct(mv),
count_distinct([5,5,6]);

avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long
2.0 |5.5 |1 |5 |3 |6 |3 |2 |3 |2
;

statsOfPropagateableConstOnIndex
required_capability: extend_aggs_on_constants_support
from employees
| eval mv=[1,2,3]
| stats avg = avg(mv),
avg([5,6]),
min(mv),
min([5,6]),
max(mv),
max([5,6]),
count(mv),
count([5,6]),
count_distinct(mv),
count_distinct([5,5,6]);

avg:double |avg([5,6]):double|min(mv):integer|min([5,6]):integer|max(mv):integer|max([5,6]):integer|count(mv):long |count([5,6]):long|count_distinct(mv):long|count_distinct([5,5,6]):long
2.0 |5.5 |1 |5 |3 |6 |300 |200 |3 |2
;

statsOfPropagateableConstWithGrouping_Count
ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a;

COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer
0 |0 |0 |1
0 |0 |0 |2
0 |0 |0 |3
;

statsOfPropagateableConstWithGroupingByFoldableNull
ROW a = 1+null, c = 5 | EVAL c = null + c | STATS COUNT(c), COUNT(null), COUNT(null - 1) BY a;

COUNT(c):long |COUNT(null):long|COUNT(null - 1):long|a:integer
0 |0 |0 |null
;

statsOfPropagateableConstWithGrouping_DistinctCount
ROW a = [1,2,3], c = 5 | EVAL c = null + c | STATS COUNT_DISTINCT(c), COUNT_DISTINCT(null), COUNT_DISTINCT(null - 1) BY a;

COUNT_DISTINCT(c):long|COUNT_DISTINCT(null):long|COUNT_DISTINCT(null - 1):long|a:integer
0 |0 |0 |1
0 |0 |0 |2
0 |0 |0 |3
;

countDistinctWithGrouping
required_capability: extend_aggs_on_constants_support
from employees
| EVAL c = 5 + 6, d = 6 + null, e = [1,2,2]
| STATS `cd(5+6)`=COUNT_DISTINCT(c),
`cd(null)`=COUNT_DISTINCT(null),
`cd(null-1)`=COUNT_DISTINCT(null - 1),
`cd(eval_null)`=COUNT_DISTINCT(d),
`cd(mv)`=COUNT_DISTINCT(e) BY gender
| sort gender;

cd(5+6):long |cd(null):long |cd(null-1):long|cd(eval_null):long|cd(mv):long |gender:keyword
1 |0 |0 |0 |2 |F
1 |0 |0 |0 |2 |M
1 |0 |0 |0 |2 |null
;

countWithGrouping
required_capability: extend_aggs_on_constants_support
from employees
| EVAL a = 5 + 6,
b = 6 + null,
e = [1,2,2]
| STATS `c(5+6)`=COUNT(a),
`c(null)`=COUNT(null),
`c(null-1)`=COUNT(null - 1),
`c(eval_null)`=COUNT(b),
`c(mv)`=COUNT(e) BY gender
| sort gender;

c(5+6):long |c(null):long |c(null-1):long |c(eval_null):long |c(mv):long |gender:keyword
33 |0 |0 |0 |99 |F
57 |0 |0 |0 |171 |M
10 |0 |0 |0 |30 |null
;

evalOverridingKey#[skip:-8.13.1,reason:fixed in 8.13.2]
FROM employees
| EVAL k = languages
Expand Down Expand Up @@ -2144,6 +2261,7 @@ null

weightedAvgWeightConstantMvWarning
required_capability: agg_weighted_avg
required_capability: extend_aggs_on_constants_support
from employees
| eval w = [1, 2, 3]
| stats w_avg = weighted_avg(salary, w)
Expand Down Expand Up @@ -2183,6 +2301,7 @@ null

weightedAvgWeightZero
required_capability: agg_weighted_avg
required_capability: extend_aggs_on_constants_support
from employees
| eval w = 0
| stats w_avg = weighted_avg(salary, w)
Expand All @@ -2196,6 +2315,7 @@ null

weightedAvgWeightZeroExp
required_capability: agg_weighted_avg
required_capability: extend_aggs_on_constants_support
from employees
| eval w = 0 + 0
| stats w_avg = weighted_avg(salary, w)
Expand Down Expand Up @@ -3086,3 +3206,50 @@ ROW a = [1,2,3], b = 5
STD_DEV(a):double | STD_DEV(b):double
0.816496580927726 | 0.0
;

valuesOfConstants
required_capability: extend_aggs_on_constants_support
row mv_int = [1,2,3,2], string = "abc", mv_string = ["bar","foo"], null_exp = 234 + null, to_be_casted_ip = "127.0.0.1", row_null = null
| stats values(null), values(mv_int), values(string), values(mv_string), values(null_exp), values(123 + null), values(to_be_casted_ip::ip), values(row_null);

values(null):null|values(mv_int):integer|values(string):keyword|values(mv_string):keyword|values(null_exp):integer|values(123 + null):integer|values(to_be_casted_ip::ip):ip|values(row_null):null
null |[1, 2, 3] |abc |[bar, foo] |null |null |127.0.0.1 |null
;

byFoldableValueFromEval
from employees | eval y = 5 + 6 | stats max(salary) by y;

max(salary):integer|y:integer
74999 |11
;

byFoldableValueInline
from employees | stats max(salary) by 5+6;

max(salary):integer| 5+6:integer
74999 |11
;

byFoldableValueInlineWithAlias
from employees | stats max(salary) by y=5+6;

max(salary):integer| y:integer
74999 |11
;

byAndMaxFoldableValues
required_capability: extend_aggs_on_constants_support
from employees | eval x = [1,2,3], y = 5 + 6 | stats max(x) by y;

max(x):integer|y:integer
3 |11
;

// see
statsByPropagateableFoldableValue_WithDropGroupings-Ignore
row field = 555, otherfield = [1,2] | stats min(field) by otherfield | drop otherfield;

min(field):integer
555
555
;
Loading