From 18c687c90bd21dfeba3201d424d4194b037697b9 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 27 Aug 2025 10:34:13 +0200 Subject: [PATCH 1/7] Clarify docs and add csv test for WHERE in STATS --- .../_snippets/commands/layout/stats-by.md | 4 +- .../src/main/resources/stats.csv-spec | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 4f694b9db59d0..3d8207ff69fb0 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -31,7 +31,9 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][, If its name coincides with one of the computed columns, that column will be ignored. `boolean_expressionX` -: The condition that must be met for a row to be included in the evaluation of `expressionX`. +: The condition that must be met for a row to be included in the evaluation of + `expressionX`. Does not have any effect on the values used in + `grouping_expressionX` or the other `expressionX`. ::::{note} Individual `null` values are skipped when computing aggregations. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 8d8e0d8f74427..ac3a62aadfa71 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2420,6 +2420,43 @@ under_40K:long |inbetween:long |over_60K:long |total:long // end::aggFilteringNoGroup-result[] ; +docsStatsWithFilteringOnGroup +required_capability: per_agg_filtering +// tag::aggFilteringOnGroup[] +FROM employees +| STATS `count` = COUNT(*) WHERE languages < 3 BY languages +// end::aggFilteringNoGroup[] +| SORT languages +; + +// tag::aggFilteringOnGroup-result[] +count:long | languages:integer +15 | 1 +19 | 2 +0 | 3 +0 | 4 +0 | 5 +0 | null +// end::aggFilteringOnGroup-result[] +; + +docsStatsWithFilteringBefore +required_capability: per_agg_filtering +// tag::aggFilteringOnGroup[] +FROM employees +| WHERE languages < 3 +| STATS `count` = COUNT(*) BY languages +// end::aggFilteringBefore[] +| SORT languages +; + +// tag::aggFilteringBefore-result[] +count:long | languages:integer +15 | 1 +19 | 2 +// end::aggFilteringBefore-result[] +; + statsWithFiltering required_capability: per_agg_filtering from employees From ba01b5079ddd29c80408e7418b840711fa0481c4 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 29 Aug 2025 08:57:56 +0200 Subject: [PATCH 2/7] Apply suggestions from code review Co-authored-by: Bogdan Pintea --- .../esql/_snippets/commands/layout/stats-by.md | 4 ++-- .../esql/qa/testFixtures/src/main/resources/stats.csv-spec | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 3d8207ff69fb0..0bb433b7cb2ed 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -32,8 +32,8 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][, `boolean_expressionX` : The condition that must be met for a row to be included in the evaluation of - `expressionX`. Does not have any effect on the values used in - `grouping_expressionX` or the other `expressionX`. + `expressionX`. Does not have any effect on the values produced by + `grouping_expressionX` or the other `expressionX`. Consequently, `... | STATS ... WHERE ...` is not equivalent to `... | WHERE | STATS ...`. ::::{note} Individual `null` values are skipped when computing aggregations. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index ac3a62aadfa71..583cb9e94401d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2425,7 +2425,7 @@ required_capability: per_agg_filtering // tag::aggFilteringOnGroup[] FROM employees | STATS `count` = COUNT(*) WHERE languages < 3 BY languages -// end::aggFilteringNoGroup[] +// end::aggFilteringOnGroup[] | SORT languages ; @@ -2442,7 +2442,7 @@ count:long | languages:integer docsStatsWithFilteringBefore required_capability: per_agg_filtering -// tag::aggFilteringOnGroup[] +// tag::aggFilteringBefore[] FROM employees | WHERE languages < 3 | STATS `count` = COUNT(*) BY languages From b421d9e620c016ff56308b771d0a6d5c95ffec7c Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 20 Feb 2026 18:57:50 +0100 Subject: [PATCH 3/7] Include new doc tests into example descriptions --- .../examples/stats.csv-spec/aggFilteringBefore.md | 12 ++++++++++++ .../stats.csv-spec/aggFilteringOnGroup.md | 15 +++++++++++++++ .../esql/_snippets/commands/layout/stats-by.md | 14 ++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md create mode 100644 docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md new file mode 100644 index 0000000000000..c5d854148d705 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md @@ -0,0 +1,12 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +```esql +FROM employees +| WHERE languages < 3 +| STATS `count` = COUNT(*) BY languages +``` + +| count:long | languages:integer | +| --- | --- | +| 15 | 1 | +| 19 | 2 | diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md new file mode 100644 index 0000000000000..757d284aecaec --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md @@ -0,0 +1,15 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +```esql +FROM employees +| STATS `count` = COUNT(*) WHERE languages < 3 BY languages +``` + +| count:long | languages:integer | +| --- | --- | +| 15 | 1 | +| 19 | 2 | +| 0 | 3 | +| 0 | 4 | +| 0 | 5 | +| 0 | null | diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index cdd58a7fc87d0..46095a4babe84 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -106,6 +106,20 @@ optional as well: :::{include} ../examples/stats.csv-spec/aggFilteringNoGroup.md ::: +The `WHERE` clause can also filter on the grouping key. Note that this is +different from filtering with `WHERE` before `STATS`, because the group +itself will still appear in the output, but with a default value for +the aggregation: + +:::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md +::: + +Compare this to filtering with `WHERE` before `STATS`, which excludes +non-matching groups entirely: + +:::{include} ../examples/stats.csv-spec/aggFilteringBefore.md +::: + It’s also possible to group by multiple values: :::{include} ../examples/stats.csv-spec/statsGroupByMultipleValues.md From a243b2fa7e708cce7bc4c5bbddc3d2778598d676 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Mon, 23 Feb 2026 15:41:09 +0100 Subject: [PATCH 4/7] copyedit and restructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `**syntax**`, `**parameters**`, `**description**` → `## h2` headings - `### examples` → `## examples` - `####` subsections promoted to `###` - `#### naming` renamed to `### omitting column names` - each example gets a concise `###` heading with a rewritten lead-in that adds context beyond the heading - `boolean_expressionx` description: tightened prose, equivalence contrast moved to two fenced `esql` blocks --- .../_snippets/commands/layout/stats-by.md | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 46095a4babe84..9f7d3a6aee7ab 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -6,7 +6,7 @@ stack: ga The `STATS` processing command groups rows according to a common value and calculates one or more aggregated values over the grouped rows. -**Syntax** +## Syntax ```esql STATS [column1 =] expression1 [WHERE boolean_expression1][, @@ -15,7 +15,7 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][, [BY grouping_expression1[, ..., grouping_expressionN]] ``` -**Parameters** +## Parameters `columnX` : The name by which the aggregated value is returned. If omitted, the name is @@ -32,15 +32,23 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][, `boolean_expressionX` : The condition that must be met for a row to be included in the evaluation of - `expressionX`. Does not have any effect on the values produced by - `grouping_expressionX` or the other `expressionX`. Consequently, `... | STATS ... WHERE ...` is not equivalent to `... | WHERE | STATS ...`. + `expressionX`. Has no effect on `grouping_expressionX` or other aggregation + expressions. Consequently, the following are _not_ equivalent: + + ```esql + ... | STATS ... WHERE ... + ``` + + ```esql + ... | WHERE | STATS ... + ``` ::::{note} Individual `null` values are skipped when computing aggregations. :::: -**Description** +## Description The `STATS` processing command groups rows according to a common value and calculates one or more aggregated values over the grouped rows. For the @@ -77,35 +85,48 @@ and then grouping - that is not going to be faster. :::: -### Examples +## Examples + +### Group by column -Calculating a statistic and grouping by the values of another column: +Combine an aggregation with `BY` to compute a value for each group: :::{include} ../examples/stats.csv-spec/stats.md ::: +### Aggregate without grouping + Omitting `BY` returns one row with the aggregations applied over the entire dataset: :::{include} ../examples/stats.csv-spec/statsWithoutBy.md ::: -It’s possible to calculate multiple values: +### Calculate multiple values + +Separate multiple aggregations with commas to compute them in a single pass: :::{include} ../examples/stats.csv-spec/statsCalcMultipleValues.md ::: -To filter the rows that go into an aggregation, use the `WHERE` clause: +### Filter aggregations with WHERE + +Use per-aggregation `WHERE` to compute conditional metrics from the same +dataset in a single pass: :::{include} ../examples/stats.csv-spec/aggFiltering.md ::: -The aggregations can be mixed, with and without a filter and grouping is -optional as well: +### Mix filtered and unfiltered aggregations + +Filtered and unfiltered aggregations can be freely mixed. Grouping is also +optional: :::{include} ../examples/stats.csv-spec/aggFilteringNoGroup.md ::: +### Filter on the grouping key + The `WHERE` clause can also filter on the grouping key. Note that this is different from filtering with `WHERE` before `STATS`, because the group itself will still appear in the output, but with a default value for @@ -114,19 +135,23 @@ the aggregation: :::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md ::: -Compare this to filtering with `WHERE` before `STATS`, which excludes -non-matching groups entirely: +### WHERE before STATS excludes non-matching groups + +When `WHERE` appears before `STATS`, rows are excluded before grouping, so +non-matching groups don't appear in the output at all: :::{include} ../examples/stats.csv-spec/aggFilteringBefore.md ::: -It’s also possible to group by multiple values: +### Group by multiple values + +Separate multiple grouping expressions with a comma: :::{include} ../examples/stats.csv-spec/statsGroupByMultipleValues.md ::: $$$esql-stats-mv-group$$$ -#### Multivalued inputs +### Multivalued inputs If the grouping key is multivalued then the input row is in all groups: @@ -153,24 +178,22 @@ key. If you want to send the group key to the function then `MV_EXPAND` first: Refer to [elasticsearch/issues/134792](https://github.com/elastic/elasticsearch/issues/134792#issuecomment-3361168090) for an even more in depth explanation. -#### Multivalue functions +### Multivalue functions -Both the aggregating functions and the grouping expressions accept other -functions. This is useful for using `STATS` on multivalue columns. -For example, to calculate the average salary change, you can use `MV_AVG` to -first average the multiple values per employee, and use the result with the -`AVG` function: +Aggregation and grouping expressions accept nested functions, which is useful +for operating on multivalue columns. Use `MV_AVG` nested inside `AVG` to first +average each employee's multiple salary values, then aggregate across employees: :::{include} ../examples/stats.csv-spec/docsStatsAvgNestedExpression.md ::: -An example of grouping by an expression is grouping employees on the first -letter of their last name: +Grouping expressions aren't limited to column references — any expression +works. For example, group by a derived value using `LEFT`: :::{include} ../examples/stats.csv-spec/docsStatsByExpression.md ::: -#### Naming +### Omitting column names Specifying the output column name is optional. If not specified, the new column name is equal to the expression. The following query returns a column named From 27360e0fa13c10fa0e9c3d1172561e57f7856455 Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Mon, 23 Feb 2026 15:46:10 +0100 Subject: [PATCH 5/7] fix wording --- .../esql/_snippets/commands/layout/stats-by.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 9f7d3a6aee7ab..8e0e3aec05968 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -180,9 +180,10 @@ for an even more in depth explanation. ### Multivalue functions -Aggregation and grouping expressions accept nested functions, which is useful -for operating on multivalue columns. Use `MV_AVG` nested inside `AVG` to first -average each employee's multiple salary values, then aggregate across employees: +Both aggregation and grouping expressions accept other functions, which is +useful for using `STATS` on multivalue columns. For example, to calculate the +average salary change, use `MV_AVG` to first average the multiple values per +employee, then pass the result to `AVG`: :::{include} ../examples/stats.csv-spec/docsStatsAvgNestedExpression.md ::: From b24213543e8856bb595a5249a8002676be4e0b3d Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Mon, 23 Feb 2026 15:49:31 +0100 Subject: [PATCH 6/7] add lead-in prose, fix example heading --- .../esql/_snippets/commands/layout/stats-by.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 8e0e3aec05968..7ef38410bb5e0 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -87,6 +87,8 @@ and then grouping - that is not going to be faster. ## Examples +The following examples demonstrate common `STATS` patterns. + ### Group by column Combine an aggregation with `BY` to compute a value for each group: @@ -194,7 +196,7 @@ works. For example, group by a derived value using `LEFT`: :::{include} ../examples/stats.csv-spec/docsStatsByExpression.md ::: -### Omitting column names +### Output column naming Specifying the output column name is optional. If not specified, the new column name is equal to the expression. The following query returns a column named From e53f298b4c2cf0149918ac6791ed9623f12a1b7f Mon Sep 17 00:00:00 2001 From: Liam Thompson Date: Mon, 23 Feb 2026 17:48:33 +0100 Subject: [PATCH 7/7] fix filteron group section --- .../esql/_snippets/commands/layout/stats-by.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md index 7ef38410bb5e0..f4a8835828dc3 100644 --- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md +++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md @@ -129,18 +129,14 @@ optional: ### Filter on the grouping key -The `WHERE` clause can also filter on the grouping key. Note that this is -different from filtering with `WHERE` before `STATS`, because the group -itself will still appear in the output, but with a default value for -the aggregation: +The `WHERE` clause can also filter on the grouping key. The group itself will +still appear in the output, but with a default value for the aggregation: :::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md ::: -### WHERE before STATS excludes non-matching groups - -When `WHERE` appears before `STATS`, rows are excluded before grouping, so -non-matching groups don't appear in the output at all: +Compare this to filtering with `WHERE` before `STATS`, where rows are excluded +before grouping, so non-matching groups don't appear in the output at all: :::{include} ../examples/stats.csv-spec/aggFilteringBefore.md :::