Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
e88e4c6
ES|QL: Fix aggregation on null value
dimitris-athanasiou Dec 16, 2025
0be6364
Update docs/changelog/139797.yaml
dimitris-athanasiou Dec 19, 2025
6019c96
Merge branch 'main' into fix-agg-on-null-esql
dimitris-athanasiou Dec 19, 2025
5927bb2
Address review feedback
dimitris-athanasiou Dec 19, 2025
cb02d37
Merge branch 'main' into fix-agg-on-null-esql
dimitris-athanasiou Dec 23, 2025
3dd883c
Merge branch 'main' into fix-agg-on-null-esql
dimitris-athanasiou Dec 23, 2025
967e647
Merge branch 'main' into fix-agg-on-null-esql
dimitris-athanasiou Jan 9, 2026
a8c7667
Do not supply nulls to aggregation tests
dimitris-athanasiou Jan 9, 2026
f6442b4
Update docs/changelog/139797.yaml
dimitris-athanasiou Jan 9, 2026
52fa801
Merge branch 'main' into fix-agg-on-null-esql
ivancea Jan 9, 2026
64c5ef5
Added special aggs tests
ivancea Jan 9, 2026
e601256
Added tests for FUSE
ivancea Jan 9, 2026
e5551b4
[CI] Auto commit changes from spotless
Jan 9, 2026
ee06d53
Fixed tests by skipping NULL checks on the aggregation field()
ivancea Jan 9, 2026
0f90a07
Fix PercentilesErrorTests
ivancea Jan 9, 2026
078946d
Merge branch 'main' into fix-agg-on-null-esql
ivancea Jan 12, 2026
6bd8106
Fix fork tests with cases using _fork
ivancea Jan 12, 2026
54969c9
Rename _fork to not be ignored on generative tests
ivancea Jan 12, 2026
a504737
Added test case with counters
ivancea Jan 12, 2026
2844972
ROUND expected rate result
ivancea Jan 12, 2026
e086e01
Fixed rate test result
ivancea Jan 12, 2026
1a249a5
Merge branch 'main' into fix-agg-on-null-esql
ivancea Jan 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changelog/139797.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 139797
summary: Fix aggregation on null value
area: ES|QL
type: bug
issues:
- 110257
- 137544
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
import java.io.IOException;
import java.util.List;

import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.*;
import static org.elasticsearch.xpack.esql.CsvTestUtils.loadCsvSpecValues;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.FORK_V9;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METRICS_GROUP_BY_ALL;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND;
import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.UNMAPPED_FIELDS;
import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.hasCapabilities;

/**
Expand Down Expand Up @@ -79,5 +83,10 @@ protected void shouldSkipTest(String testName) throws IOException {
);

assumeTrue("Cluster needs to support FORK", hasCapabilities(adminClient(), List.of(FORK_V9.capabilityName())));

assumeFalse(
"Tests expecting a _fork column can't be tested as _fork will be dropped",
loadCsvSpecValues(testCase.expectedResults).columnNames().contains("_fork")
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ public enum Type {
IP_RANGE(InetAddresses::parseCidr, BytesRef.class),
DATE_RANGE(EsqlDataTypeConverter::parseDateRange, LongRangeBlockBuilder.LongRange.class),
VERSION(v -> new org.elasticsearch.xpack.versionfield.Version(v).toBytesRef(), BytesRef.class),
NULL(s -> null, Void.class),
NULL(s -> s, Void.class),
DATETIME(
x -> x == null ? null : DateFormatters.from(UTC_DATE_TIME_FORMATTER.parse(x)).toInstant().toEpochMilli(),
(l, r) -> l instanceof Long maybeIP ? maybeIP.compareTo((Long) r) : l.toString().compareTo(r.toString()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,25 @@ ROW a = 3
x:boolean
true
;

fixAbsentOnLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| STATS ABSENT(x)
;

ABSENT(x):boolean
true
;

fixAbsentOnChildLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 0
| STATS ABSENT(null)
;

ABSENT(null):boolean
true
;
Original file line number Diff line number Diff line change
Expand Up @@ -819,3 +819,47 @@ a | 0.0
b | 0.0
c | 0.0
;

fuseWithRowWithNullValues
required_capability: fuse_v6
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null, _id = "", _index = "", _score = 1.0, my_fork = ""
| LIMIT 1
| FUSE LINEAR GROUP BY my_fork
;

_score:double | x:null | _id:keyword | _index:keyword | my_fork:keyword
1 | null | "" | "" | ""
;

fuseWithForkAndRowWithNullValues
required_capability: fork_v9
required_capability: fuse_v6
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null, _id = "", _index = "", _score = 1.0
| FORK (EVAl y = 2) (EVAl y = 3)
| FUSE LINEAR
;

_score:double | x:null | _id:keyword | _index:keyword | y:integer | _fork:keyword
2 | null | "" | "" | [2, 3] | [fork1,fork2]
;

fuseWithForkAndFromWithNullValues
required_capability: fork_v9
required_capability: fuse_v6
required_capability: fix_agg_on_null_by_replacing_with_eval

FROM employees METADATA _id, _index, _score
| EVAL x = null
| FORK ( WHERE emp_no:10001 )
( WHERE emp_no:10002 )
| FUSE
| KEEP x
;

x:null
null
;
Original file line number Diff line number Diff line change
Expand Up @@ -5113,3 +5113,80 @@ FROM employees
29175 |2.0 |281.0 |48248.55
30404 |3.0 |281.0 |48248.55
;

fixInlineStatsValuesOnReferenceToNullUncasted
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| INLINE STATS VALUES(x)
;

x:null | VALUES(x):null
null | null
;

fixInlineStatsValuesOnReferenceToNullCasted
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null::long
| INLINE STATS VALUES(x)
;

x:long | VALUES(x):long
null | null
;

fixInlineStatsValuesOnLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 1
| INLINE STATS y = VALUES(null)
;

x:integer | y:null
1 | null
;

fixInlineStatsAggOnNullGivenMultipleAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW w = 1, x = null, y = null, z = 42
| INLINE STATS a = MIN(w), b = MIN(x), c = MAX(y), d = MEDIAN(z)
;

w:integer | x:null | y:null | z:integer | a:integer | b:null | c:null | d:double
1 | null | null | 42 | 1 | null | null | 42
;

fixInlineStatsAggOnNullGivenMultipleAggsWithExpressionsAndBy
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW w = 1 * 10, x = null, y = null + 3, z = 42
| INLINE STATS a = MIN(w) + 1, b = MIN(x), c = MAX(y) by z
;

w:integer | x:null | y:integer | a:integer | b:null | c:integer | z:integer
10 | null | null | 11 | null | null | 42
;

fixInlineStatsAggOnNullWithSpecialReturnValueAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| INLINE STATS a = COUNT(x), b = COUNT_DISTINCT(x), c = PRESENT(x), d = ABSENT(x)
;

x:null | a:long | b:long | c:boolean | d:boolean
null | 0 | 0 | false | true
;

fixInlineStatsAggOnNullAsChildWithSpecialReturnValueAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 0
| INLINE STATS a = COUNT(null), b = COUNT_DISTINCT(null), c = PRESENT(null), d = ABSENT(null)
;

x:integer | a:long | b:long | c:boolean | d:boolean
0 | 0 | 0 | false | true
;
Original file line number Diff line number Diff line change
Expand Up @@ -507,3 +507,24 @@ max_rate:double
13.173725
;


rateOnNulls
required_capability: ts_command_v0
required_capability: metrics_group_by_all
required_capability: metrics_group_by_all_with_ts_dimensions
required_capability: fix_agg_on_null_by_replacing_with_eval

TS k8s
| EVAL null_col = null
| STATS
rate = rate(network.total_bytes_in),
null_literal = rate(null),
null_propagated = rate(null_col)
| EVAL rate=ROUND(rate, 4)
| SORT rate DESC
| LIMIT 2;

null_literal:double | null_propagated:double | rate:double
null | null | 13.1737
null | null | 10.6491
;
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,26 @@ ROW a = 3
x:boolean
false
;

fixPresentOnLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| STATS PRESENT(x)
;

PRESENT(x):boolean
false
;

fixPresentOnChildLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 0
| STATS PRESENT(null)
;

PRESENT(null):boolean
false
;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be lovely if we could add a couple more tests, as we're adding something pretty new here:

  • tests with different agg functions, esp. the special ones from ReplaceStatsFilteredAggWithEval#mapNullToValue
  • tests with more than 1 agg function where 1 or more get null literals, and some where another agg function does not get a null value.
  • tests with BY
  • tests with INLINE STATS
  • tests with per-agg WHERE clauses

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tests with different agg functions, esp. the special ones from ReplaceStatsFilteredAggWithEval#mapNullToValue

There are already tests for count/count_distinct:

  • stats.countNull
  • stats.countDistinctNull

Those were both working because those two functions are not nullable and they were escaping null-folding because of that.

I'll definitely add tests for all other points you bring up!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, since this closes #137544, let's add the repro queries from the issue to the spec tests?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added tests for FUSE, they work correctly in this PR

Original file line number Diff line number Diff line change
Expand Up @@ -3600,6 +3600,96 @@ a:double | b:double | c:long
6.0 | 6.0 | 8
;

fixStatsValuesOnReferenceToNullUncasted
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| STATS VALUES(x)
;

VALUES(x):null
null
;

fixStatsValuesOnReferenceToNullCasted
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null::long
| STATS VALUES(x)
;

VALUES(x):long
null
;

fixStatsValuesOnLiteralNull
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 1
| STATS y = VALUES(null)
;

y:null
null
;

fixStatsAggOnNullGivenMultipleAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW w = 1, x = null, y = null, z = 42
| STATS a = MIN(w), b = MIN(x), c = MAX(y), d = MEDIAN(z)
;

a:integer | b:null | c:null | d:double
1 | null | null | 42
;

fixStatsAggOnNullGivenMultipleAggsWithExpressionsAndBy
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW w = 1 * 10, x = null, y = null + 3, z = 42
| STATS a = MIN(w), b = MIN(x), c = MAX(y), d = MEDIAN(z) + 1 by x
;

a:integer | b:null | c:integer | d:double | x:null
10 | null | null | 43 | null
;

fixStatsAggOnNullGivenMultipleAggsWithWhereClauses
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null, y = null, z = 1
| STATS a = MIN(x) WHERE z + 1 == 2,
b = MAX(y) WHERE z + 1 == 3,
c = VALUES(z) WHERE z + 1 == 3
;

a:null | b:null | c:integer
null | null | null
;


fixStatsAggOnNullWithSpecialReturnValueAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = null
| STATS a = COUNT(x), b = COUNT_DISTINCT(x), c = PRESENT(x), d = ABSENT(x)
;

a:long | b:long | c:boolean | d:boolean
0 | 0 | false | true
;

fixStatsAggOnNullAsChildWithSpecialReturnValueAggs
required_capability: fix_agg_on_null_by_replacing_with_eval

ROW x = 0
| STATS a = COUNT(null), b = COUNT_DISTINCT(null), c = PRESENT(null), d = ABSENT(null)
;

a:long | b:long | c:boolean | d:boolean
0 | 0 | false | true
;

statsMvConstantGroupByWhere
required_capability: fix_stats_mv_constant_fold
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperator;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.rest.action.admin.cluster.RestNodesCapabilitiesAction;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredOrNullAggWithEval;
import org.elasticsearch.xpack.esql.plugin.EsqlFeatures;

import java.util.ArrayList;
Expand Down Expand Up @@ -1828,7 +1829,7 @@ public enum Cap {
FIX_INLINE_STATS_INCORRECT_PRUNNING(INLINE_STATS.enabled),

/**
* {@link org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceStatsFilteredAggWithEval} replaced a stats
* {@link ReplaceStatsFilteredOrNullAggWithEval} replaced a stats
* with false filter with null with {@link org.elasticsearch.xpack.esql.expression.function.aggregate.Present} or
* {@link org.elasticsearch.xpack.esql.expression.function.aggregate.Absent}
*/
Expand All @@ -1849,6 +1850,14 @@ public enum Cap {
*/
ENABLE_REDUCE_NODE_LATE_MATERIALIZATION(Build.current().isSnapshot()),

/**
* {@link ReplaceStatsFilteredOrNullAggWithEval} now replaces an
* {@link org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction} with null value with an
* {@link org.elasticsearch.xpack.esql.plan.logical.Eval}.
* https://github.com/elastic/elasticsearch/issues/137544
*/
FIX_AGG_ON_NULL_BY_REPLACING_WITH_EVAL,

/**
* Support for requesting the "_tier" metadata field.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ public static double doubleValueOf(Expression field, String sourceText, String f
return n.doubleValue();
}
throw new EsqlIllegalArgumentException(
Strings.format(null, "[{}] value must be a constant number in [{}], found [{}]", fieldName, sourceText, field)
Strings.format("[%s] value must be a constant number in [%s], found [%s]", fieldName, sourceText, field)
);
}
}
Loading