From 18c687c90bd21dfeba3201d424d4194b037697b9 Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Wed, 27 Aug 2025 10:34:13 +0200
Subject: [PATCH 1/7] Clarify docs and add csv test for WHERE in STATS

---
 .../_snippets/commands/layout/stats-by.md     |  4 +-
 .../src/main/resources/stats.csv-spec         | 37 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 4f694b9db59d0..3d8207ff69fb0 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -31,7 +31,9 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][,
     If its name coincides with one of the computed columns, that column will be ignored.
 
 `boolean_expressionX`
-:   The condition that must be met for a row to be included in the evaluation of `expressionX`.
+:   The condition that must be met for a row to be included in the evaluation of
+    `expressionX`. Does not have any effect on the values used in
+    `grouping_expressionX` or the other `expressionX`.
 
 ::::{note}
 Individual `null` values are skipped when computing aggregations.
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
index 8d8e0d8f74427..ac3a62aadfa71 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
@@ -2420,6 +2420,43 @@ under_40K:long |inbetween:long |over_60K:long  |total:long
 // end::aggFilteringNoGroup-result[]
 ;
 
+docsStatsWithFilteringOnGroup
+required_capability: per_agg_filtering
+// tag::aggFilteringOnGroup[]
+FROM employees
+| STATS `count` = COUNT(*) WHERE languages < 3 BY languages
+// end::aggFilteringNoGroup[]
+| SORT languages
+;
+
+// tag::aggFilteringOnGroup-result[]
+count:long | languages:integer
+15         | 1
+19         | 2
+0          | 3
+0          | 4
+0          | 5
+0          | null
+// end::aggFilteringOnGroup-result[]
+;
+
+docsStatsWithFilteringBefore
+required_capability: per_agg_filtering
+// tag::aggFilteringOnGroup[]
+FROM employees
+| WHERE languages < 3
+| STATS `count` = COUNT(*) BY languages
+// end::aggFilteringBefore[]
+| SORT languages
+;
+
+// tag::aggFilteringBefore-result[]
+count:long | languages:integer
+15         | 1
+19         | 2
+// end::aggFilteringBefore-result[]
+;
+
 statsWithFiltering
 required_capability: per_agg_filtering
 from employees

From ba01b5079ddd29c80408e7418b840711fa0481c4 Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Fri, 29 Aug 2025 08:57:56 +0200
Subject: [PATCH 2/7] Apply suggestions from code review

Co-authored-by: Bogdan Pintea <sig11@mailbox.org>
---
 .../esql/_snippets/commands/layout/stats-by.md                | 4 ++--
 .../esql/qa/testFixtures/src/main/resources/stats.csv-spec    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 3d8207ff69fb0..0bb433b7cb2ed 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -32,8 +32,8 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][,
 
 `boolean_expressionX`
 :   The condition that must be met for a row to be included in the evaluation of
-    `expressionX`. Does not have any effect on the values used in
-    `grouping_expressionX` or the other `expressionX`.
+    `expressionX`. Does not have any effect on the values produced by
+    `grouping_expressionX` or the other `expressionX`. Consequently, `... | STATS ... WHERE <condition> ...` is not equivalent to `... | WHERE <condition> | STATS ...`.
 
 ::::{note}
 Individual `null` values are skipped when computing aggregations.
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
index ac3a62aadfa71..583cb9e94401d 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
@@ -2425,7 +2425,7 @@ required_capability: per_agg_filtering
 // tag::aggFilteringOnGroup[]
 FROM employees
 | STATS `count` = COUNT(*) WHERE languages < 3 BY languages
-// end::aggFilteringNoGroup[]
+// end::aggFilteringOnGroup[]
 | SORT languages
 ;
 
@@ -2442,7 +2442,7 @@ count:long | languages:integer
 
 docsStatsWithFilteringBefore
 required_capability: per_agg_filtering
-// tag::aggFilteringOnGroup[]
+// tag::aggFilteringBefore[]
 FROM employees
 | WHERE languages < 3
 | STATS `count` = COUNT(*) BY languages

From b421d9e620c016ff56308b771d0a6d5c95ffec7c Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Fri, 20 Feb 2026 18:57:50 +0100
Subject: [PATCH 3/7] Include new doc tests into example descriptions

---
 .../examples/stats.csv-spec/aggFilteringBefore.md | 12 ++++++++++++
 .../stats.csv-spec/aggFilteringOnGroup.md         | 15 +++++++++++++++
 .../esql/_snippets/commands/layout/stats-by.md    | 14 ++++++++++++++
 3 files changed, 41 insertions(+)
 create mode 100644 docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md
 create mode 100644 docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md

diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md
new file mode 100644
index 0000000000000..c5d854148d705
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringBefore.md
@@ -0,0 +1,12 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM employees
+| WHERE languages < 3
+| STATS `count` = COUNT(*) BY languages
+```
+
+| count:long | languages:integer |
+| --- | --- |
+| 15 | 1 |
+| 19 | 2 |
diff --git a/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md
new file mode 100644
index 0000000000000..757d284aecaec
--- /dev/null
+++ b/docs/reference/query-languages/esql/_snippets/commands/examples/stats.csv-spec/aggFilteringOnGroup.md
@@ -0,0 +1,15 @@
+% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.
+
+```esql
+FROM employees
+| STATS `count` = COUNT(*) WHERE languages < 3 BY languages
+```
+
+| count:long | languages:integer |
+| --- | --- |
+| 15 | 1 |
+| 19 | 2 |
+| 0 | 3 |
+| 0 | 4 |
+| 0 | 5 |
+| 0 | null |
diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index cdd58a7fc87d0..46095a4babe84 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -106,6 +106,20 @@ optional as well:
 :::{include} ../examples/stats.csv-spec/aggFilteringNoGroup.md
 :::
 
+The `WHERE` clause can also filter on the grouping key. Note that this is
+different from filtering with `WHERE` before `STATS`, because the group
+itself will still appear in the output, but with a default value for
+the aggregation:
+
+:::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md
+:::
+
+Compare this to filtering with `WHERE` before `STATS`, which excludes
+non-matching groups entirely:
+
+:::{include} ../examples/stats.csv-spec/aggFilteringBefore.md
+:::
+
 It’s also possible to group by multiple values:
 
 :::{include} ../examples/stats.csv-spec/statsGroupByMultipleValues.md

From a243b2fa7e708cce7bc4c5bbddc3d2778598d676 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 23 Feb 2026 15:41:09 +0100
Subject: [PATCH 4/7] copyedit and restructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- `**syntax**`, `**parameters**`, `**description**` → `## h2` headings
- `### examples` → `## examples`
- `####` subsections promoted to `###`
- `#### naming` renamed to `### omitting column names`
- each example gets a concise `###` heading with a rewritten lead-in that adds context beyond the heading
- `boolean_expressionx` description: tightened prose, equivalence contrast moved to two fenced `esql` blocks
---
 .../_snippets/commands/layout/stats-by.md     | 71 ++++++++++++-------
 1 file changed, 47 insertions(+), 24 deletions(-)

diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 46095a4babe84..9f7d3a6aee7ab 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -6,7 +6,7 @@ stack: ga
 The `STATS` processing command groups rows according to a common value
 and calculates one or more aggregated values over the grouped rows.
 
-**Syntax**
+## Syntax
 
 ```esql
 STATS [column1 =] expression1 [WHERE boolean_expression1][,
@@ -15,7 +15,7 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][,
       [BY grouping_expression1[, ..., grouping_expressionN]]
 ```
 
-**Parameters**
+## Parameters
 
 `columnX`
 :   The name by which the aggregated value is returned. If omitted, the name is
@@ -32,15 +32,23 @@ STATS [column1 =] expression1 [WHERE boolean_expression1][,
 
 `boolean_expressionX`
 :   The condition that must be met for a row to be included in the evaluation of
-    `expressionX`. Does not have any effect on the values produced by
-    `grouping_expressionX` or the other `expressionX`. Consequently, `... | STATS ... WHERE <condition> ...` is not equivalent to `... | WHERE <condition> | STATS ...`.
+    `expressionX`. Has no effect on `grouping_expressionX` or other aggregation
+    expressions. Consequently, the following are _not_ equivalent:
+
+    ```esql
+    ... | STATS ... WHERE <condition> ...
+    ```
+
+    ```esql
+    ... | WHERE <condition> | STATS ...
+    ```
 
 ::::{note}
 Individual `null` values are skipped when computing aggregations.
 ::::
 
 
-**Description**
+## Description
 
 The `STATS` processing command groups rows according to a common value
 and calculates one or more aggregated values over the grouped rows. For the
@@ -77,35 +85,48 @@ and then grouping - that is not going to be faster.
 ::::
 
 
-### Examples
+## Examples
+
+### Group by column
 
-Calculating a statistic and grouping by the values of another column:
+Combine an aggregation with `BY` to compute a value for each group:
 
 :::{include} ../examples/stats.csv-spec/stats.md
 :::
 
+### Aggregate without grouping
+
 Omitting `BY` returns one row with the aggregations applied over the entire
 dataset:
 
 :::{include} ../examples/stats.csv-spec/statsWithoutBy.md
 :::
 
-It’s possible to calculate multiple values:
+### Calculate multiple values
+
+Separate multiple aggregations with commas to compute them in a single pass:
 
 :::{include} ../examples/stats.csv-spec/statsCalcMultipleValues.md
 :::
 
-To filter the rows that go into an aggregation, use the `WHERE` clause:
+### Filter aggregations with WHERE
+
+Use per-aggregation `WHERE` to compute conditional metrics from the same
+dataset in a single pass:
 
 :::{include} ../examples/stats.csv-spec/aggFiltering.md
 :::
 
-The aggregations can be mixed, with and without a filter and grouping is
-optional as well:
+### Mix filtered and unfiltered aggregations
+
+Filtered and unfiltered aggregations can be freely mixed. Grouping is also
+optional:
 
 :::{include} ../examples/stats.csv-spec/aggFilteringNoGroup.md
 :::
 
+### Filter on the grouping key
+
 The `WHERE` clause can also filter on the grouping key. Note that this is
 different from filtering with `WHERE` before `STATS`, because the group
 itself will still appear in the output, but with a default value for
@@ -114,19 +135,23 @@ the aggregation:
 :::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md
 :::
 
-Compare this to filtering with `WHERE` before `STATS`, which excludes
-non-matching groups entirely:
+### WHERE before STATS excludes non-matching groups
+
+When `WHERE` appears before `STATS`, rows are excluded before grouping, so
+non-matching groups don't appear in the output at all:
 
 :::{include} ../examples/stats.csv-spec/aggFilteringBefore.md
 :::
 
-It’s also possible to group by multiple values:
+### Group by multiple values
+
+Separate multiple grouping expressions with a comma:
 
 :::{include} ../examples/stats.csv-spec/statsGroupByMultipleValues.md
 :::
 
 $$$esql-stats-mv-group$$$
-#### Multivalued inputs
+### Multivalued inputs
 
 If the grouping key is multivalued then the input row is in all groups:
 
@@ -153,24 +178,22 @@ key. If you want to send the group key to the function then `MV_EXPAND` first:
 Refer to [elasticsearch/issues/134792](https://github.com/elastic/elasticsearch/issues/134792#issuecomment-3361168090)
 for an even more in depth explanation.
 
-#### Multivalue functions
+### Multivalue functions
 
-Both the aggregating functions and the grouping expressions accept other
-functions. This is useful for using `STATS` on multivalue columns.
-For example, to calculate the average salary change, you can use `MV_AVG` to
-first average the multiple values per employee, and use the result with the
-`AVG` function:
+Aggregation and grouping expressions accept nested functions, which is useful
+for operating on multivalue columns. Use `MV_AVG` nested inside `AVG` to first
+average each employee's multiple salary values, then aggregate across employees:
 
 :::{include} ../examples/stats.csv-spec/docsStatsAvgNestedExpression.md
 :::
 
-An example of grouping by an expression is grouping employees on the first
-letter of their last name:
+Grouping expressions aren't limited to column references — any expression
+works. For example, group by a derived value using `LEFT`:
 
 :::{include} ../examples/stats.csv-spec/docsStatsByExpression.md
 :::
 
-#### Naming
+### Omitting column names
 
 Specifying the output column name is optional. If not specified, the new column
 name is equal to the expression. The following query returns a column named

From 27360e0fa13c10fa0e9c3d1172561e57f7856455 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 23 Feb 2026 15:46:10 +0100
Subject: [PATCH 5/7] fix wording

---
 .../esql/_snippets/commands/layout/stats-by.md             | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 9f7d3a6aee7ab..8e0e3aec05968 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -180,9 +180,10 @@ for an even more in depth explanation.
 
 ### Multivalue functions
 
-Aggregation and grouping expressions accept nested functions, which is useful
-for operating on multivalue columns. Use `MV_AVG` nested inside `AVG` to first
-average each employee's multiple salary values, then aggregate across employees:
+Both aggregation and grouping expressions accept other functions, which is
+useful for using `STATS` on multivalue columns. For example, to calculate the
+average salary change, use `MV_AVG` to first average the multiple values per
+employee, then pass the result to `AVG`:
 
 :::{include} ../examples/stats.csv-spec/docsStatsAvgNestedExpression.md
 :::

From b24213543e8856bb595a5249a8002676be4e0b3d Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 23 Feb 2026 15:49:31 +0100
Subject: [PATCH 6/7] add lead-in prose, fix example heading

---
 .../esql/_snippets/commands/layout/stats-by.md                | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 8e0e3aec05968..7ef38410bb5e0 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -87,6 +87,8 @@ and then grouping - that is not going to be faster.
 
 ## Examples
 
+The following examples demonstrate common `STATS` patterns.
+
 ### Group by column
 
 Combine an aggregation with `BY` to compute a value for each group:
@@ -194,7 +196,7 @@ works. For example, group by a derived value using `LEFT`:
 :::{include} ../examples/stats.csv-spec/docsStatsByExpression.md
 :::
 
-### Omitting column names
+### Output column naming
 
 Specifying the output column name is optional. If not specified, the new column
 name is equal to the expression. The following query returns a column named

From e53f298b4c2cf0149918ac6791ed9623f12a1b7f Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 23 Feb 2026 17:48:33 +0100
Subject: [PATCH 7/7] fix filteron group section

---
 .../esql/_snippets/commands/layout/stats-by.md       | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
index 7ef38410bb5e0..f4a8835828dc3 100644
--- a/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
+++ b/docs/reference/query-languages/esql/_snippets/commands/layout/stats-by.md
@@ -129,18 +129,14 @@ optional:
 
 ### Filter on the grouping key
 
-The `WHERE` clause can also filter on the grouping key. Note that this is
-different from filtering with `WHERE` before `STATS`, because the group
-itself will still appear in the output, but with a default value for
-the aggregation:
+The `WHERE` clause can also filter on the grouping key. The group itself will
+still appear in the output, but with a default value for the aggregation:
 
 :::{include} ../examples/stats.csv-spec/aggFilteringOnGroup.md
 :::
 
-### WHERE before STATS excludes non-matching groups
-
-When `WHERE` appears before `STATS`, rows are excluded before grouping, so
-non-matching groups don't appear in the output at all:
+Compare this to filtering with `WHERE` before `STATS`, where rows are excluded
+before grouping, so non-matching groups don't appear in the output at all:
 
 :::{include} ../examples/stats.csv-spec/aggFilteringBefore.md
 :::