elastic · nik9000 · Jul 24, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
@@ -37,6 +37,7 @@ image::images/esql/processing-command.svg[A processing command changing an input
 * <<esql-enrich>>
 * <<esql-eval>>
 * <<esql-grok>>
+* experimental:[] <<esql-inlinestats-by>>
 * <<esql-keep>>
 * <<esql-limit>>
 ifeval::["{release-state}"=="unreleased"]
@@ -59,6 +60,7 @@ include::processing-commands/drop.asciidoc[]
 include::processing-commands/enrich.asciidoc[]
 include::processing-commands/eval.asciidoc[]
 include::processing-commands/grok.asciidoc[]
+include::processing-commands/inlinestats.asciidoc[]
 include::processing-commands/keep.asciidoc[]
 include::processing-commands/limit.asciidoc[]
 ifeval::["{release-state}"=="unreleased"]

@@ -0,0 +1,75 @@
+[discrete]
+[[esql-inlinestats-by]]
+=== `INLINESTATS ... BY`
+
+**Syntax**
+
+[source,esql]
+----
+INLINESTATS [column1 =] expression1[, ..., [columnN =] expressionN]
+[BY grouping_expression1[, ..., grouping_expressionN]]
+----
+
+*Parameters*
+
+`columnX`::
+The name by which the aggregated value is returned. If omitted, the name is
+equal to the corresponding expression (`expressionX`).
+
+`expressionX`::
+An expression that computes an aggregated value.
+
+`grouping_expressionX`::
+An expression that outputs the values to group by.
+
+NOTE: Individual `null` values are skipped when computing aggregations.
+
+*Description*
+
+The `INLINESTATS` command calculates an aggregate result and merges that result
+back into the stream of input data. Without the optional `BY` clause this will
+produce a single result which is merged into the input. With a `BY` clause this
+will produce one result per grouping merge the result into the stream based on
+matching group keys.
+
+All of the <<esql-agg-functions,aggregation functions>> are supported.
+
+*Examples*
+
+Find the employees that speak the most languages (it's a tie!):
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/inlinestats.csv-spec[tag=max-languages]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/inlinestats.csv-spec[tag=max-languages-result]
+|===
+
+Find the longest tenured employee who's last name starts with each letter of the alphabet:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/inlinestats.csv-spec[tag=longest-tenured-by-first]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/inlinestats.csv-spec[tag=longest-tenured-by-first-result]
+|===
+
+Find the northern and southern most airports:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/inlinestats.csv-spec[tag=extreme-airports]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/inlinestats.csv-spec[tag=extreme-airports-result]
+|===
+
+NOTE: Our test data doesn't have many "small" airports.
+
+If a `BY` field is multivalued then `INLINESTATS` will put the row in *each*
+bucket like <<esql-stats-by>>.
diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc
@@ -6,7 +6,7 @@
 
 [source,esql]
 ----
-STATS [column1 =] expression1[, ..., [columnN =] expressionN] 
+STATS [column1 =] expression1[, ..., [columnN =] expressionN]
 [BY grouping_expression1[, ..., grouping_expressionN]]
 ----
 
@@ -39,8 +39,8 @@ NOTE: `STATS` without any groups is much much faster than adding a group.
 
 NOTE: Grouping on a single expression is currently much more optimized than grouping
       on many expressions. In some tests we have seen grouping on a single `keyword`
-      column to be five times faster than grouping on two `keyword` columns. Do 
-      not try to work around this by combining the two columns together with 
+      column to be five times faster than grouping on two `keyword` columns. Do
+      not try to work around this by combining the two columns together with
       something like <<esql-concat>> and then grouping - that is not going to be
       faster.
 
@@ -80,8 +80,7 @@ include::{esql-specs}/stats.csv-spec[tag=statsCalcMultipleValues]
 include::{esql-specs}/stats.csv-spec[tag=statsCalcMultipleValues-result]
 |===
 
-It's also possible to group by multiple values (only supported for long and
-keyword family fields):
+It's also possible to group by multiple values:
 
 [source,esql]
 ----
@@ -139,3 +138,4 @@ include::{esql-specs}/stats.csv-spec[tag=statsUnnamedColumnEval]
 |===
 include::{esql-specs}/stats.csv-spec[tag=statsUnnamedColumnEval-result]
 |===
+
diff --git a/...l/compute/src/main/java/org/elasticsearch/compute/aggregation/table/RowInTableLookup.java b/...l/compute/src/main/java/org/elasticsearch/compute/aggregation/table/RowInTableLookup.java
@@ -37,6 +37,9 @@ public abstract sealed class RowInTableLookup implements Releasable permits Empt
     public abstract String toString();
 
     public static RowInTableLookup build(BlockFactory blockFactory, Block[] keys) {
+        if (keys.length < 1) {
+            throw new IllegalArgumentException("expected [keys] to be non-empty");
+        }
         int positions = keys[0].getPositionCount();
         for (int k = 0; k < keys.length; k++) {
             if (positions != keys[k].getPositionCount()) {

diff --git a/...ql/compute/src/main/java/org/elasticsearch/compute/operator/RowInTableLookupOperator.java b/...ql/compute/src/main/java/org/elasticsearch/compute/operator/RowInTableLookupOperator.java
@@ -40,6 +40,12 @@ public String toString() {
      * are never closed, so we need to build them from a non-tracking factory.
      */
     public record Factory(Key[] keys, int[] blockMapping) implements Operator.OperatorFactory {
+        public Factory {
+            if (keys.length < 1) {
+                throw new IllegalArgumentException("expected [keys] to be non-empty");
+            }
+        }
+
         @Override
         public Operator get(DriverContext driverContext) {
             return new RowInTableLookupOperator(driverContext.blockFactory(), keys, blockMapping);
@@ -56,6 +62,9 @@ public String describe() {
     private final int[] blockMapping;
 
     public RowInTableLookupOperator(BlockFactory blockFactory, Key[] keys, int[] blockMapping) {
+        if (keys.length < 1) {
+            throw new IllegalArgumentException("expected [keys] to be non-empty");
+        }
         this.blockMapping = blockMapping;
         this.keys = new ArrayList<>(keys.length);
         Block[] blocks = new Block[keys.length];

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec
@@ -0,0 +1,221 @@
+maxOfInt
+required_capability: inlinestats
+
+// tag::max-languages[]
+FROM employees
+| KEEP emp_no, languages
+| INLINESTATS max_lang = MAX(languages) 
+| WHERE max_lang == languages
+| SORT emp_no ASC
+| LIMIT 5
+// end::max-languages[]
+;
+
+// tag::max-languages-result[]
+emp_no:integer | languages:integer | max_lang:integer
+         10002 |                 5 | 5
+         10004 |                 5 | 5
+         10011 |                 5 | 5
+         10012 |                 5 | 5
+         10014 |                 5 | 5
+// end::max-languages-result[]
+;
+
+maxOfIntByKeyword
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, languages, gender
+| INLINESTATS max_lang = MAX(languages) BY gender 
+| WHERE max_lang == languages
+| SORT emp_no ASC
+| LIMIT 5;
+
+emp_no:integer | languages:integer | gender:keyword | max_lang:integer
+         10002 |                 5 | F              | 5
+         10004 |                 5 | M              | 5
+         10011 |                 5 | null           | 5
+         10012 |                 5 | null           | 5
+         10014 |                 5 | null           | 5
+;
+
+maxOfLongByKeyword
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, avg_worked_seconds, gender
+| INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY gender 
+| WHERE max_avg_worked_seconds == avg_worked_seconds
+| SORT emp_no ASC;
+
+emp_no:integer | avg_worked_seconds:long | gender:keyword | max_avg_worked_seconds:long
+         10007 |               393084805 | F              | 393084805
+         10015 |               390266432 | null           | 390266432
+         10030 |               394597613 | M              | 394597613
+;
+
+maxOfLong
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, avg_worked_seconds, gender
+| INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) 
+| WHERE max_avg_worked_seconds == avg_worked_seconds
+| SORT emp_no ASC;
+
+emp_no:integer | avg_worked_seconds:long | gender:keyword | max_avg_worked_seconds:long
+         10030 |               394597613 | M              | 394597613
+;
+
+// TODO allow inline calculation like BY l = SUBSTRING(
+maxOfLongByCalculatedKeyword
+required_capability: inlinestats
+
+// tag::longest-tenured-by-first[]
+FROM employees
+| EVAL l = SUBSTRING(last_name, 0, 1)
+| KEEP emp_no, avg_worked_seconds, l
+| INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY l 
+| WHERE max_avg_worked_seconds == avg_worked_seconds
+| SORT l ASC
+| LIMIT 5
+// end::longest-tenured-by-first[]
+;
+
+// tag::longest-tenured-by-first-result[]
+emp_no:integer | avg_worked_seconds:long | l:keyword | max_avg_worked_seconds:long
+         10065 |               372660279 | A         | 372660279
+         10074 |               382397583 | B         | 382397583
+         10044 |               387408356 | C         | 387408356
+         10030 |               394597613 | D         | 394597613
+         10087 |               305782871 | E         | 305782871
+// end::longest-tenured-by-first-result[]
+;
+
+maxOfLongByInt
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, avg_worked_seconds, languages
+| INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY languages
+| WHERE max_avg_worked_seconds == avg_worked_seconds
+| SORT languages ASC;
+
+emp_no:integer | avg_worked_seconds:long | languages:integer | max_avg_worked_seconds:long
+         10044 |               387408356 |                 1 | 387408356
+         10099 |               377713748 |                 2 | 377713748
+         10030 |               394597613 |                 3 | 394597613
+         10007 |               393084805 |                 4 | 393084805
+         10015 |               390266432 |                 5 | 390266432
+         10027 |               374037782 |              null | 374037782
+;
+
+maxOfLongByIntDouble
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, avg_worked_seconds, languages, height
+| EVAL height=ROUND(height, 1)
+| INLINESTATS max_avg_worked_seconds = MAX(avg_worked_seconds) BY languages, height
+| WHERE max_avg_worked_seconds == avg_worked_seconds
+| SORT languages, height ASC
+| LIMIT 4;
+
+emp_no:integer | avg_worked_seconds:long | languages:integer | height:double | max_avg_worked_seconds:long
+         10083 |               331236443 |                 1 |           1.4 | 331236443
+         10084 |               359067056 |                 1 |           1.5 | 359067056
+         10033 |               208374744 |                 1 |           1.6 | 208374744
+         10086 |               328580163 |                 1 |           1.7 | 328580163
+;
+
+
+two
+required_capability: inlinestats
+
+FROM employees
+| KEEP emp_no, languages, avg_worked_seconds, gender
+| INLINESTATS avg_avg_worked_seconds = AVG(avg_worked_seconds) BY languages
+| WHERE avg_worked_seconds > avg_avg_worked_seconds
+| INLINESTATS max_languages = MAX(languages) BY gender
+| SORT emp_no ASC
+| LIMIT 3;
+
+emp_no:integer | languages:integer | avg_worked_seconds:long | gender:keyword | avg_avg_worked_seconds:double | max_languages:integer
+         10002 |                 5 |               328922887 | F              | 3.133013149047619E8           | 5
+         10006 |                 3 |               372957040 | F              | 2.978159518235294E8           | 5
+         10007 |                 4 |               393084805 | F              | 2.863684210555556E8           | 5
+;
+
+byMultivaluedSimple
+required_capability: inlinestats
+
+// tag:mv-simple
+FROM airports
+| INLINESTATS min_scalerank=MIN(scalerank) BY type
+| EVAL type=MV_SORT(type), min_scalerank=MV_SORT(min_scalerank)
+| KEEP abbrev, type, scalerank, min_scalerank
+| WHERE abbrev == "GWL"
+// tag:mv-simple
+;
+
+// tag:mv-simple-result
+abbrev:keyword |  type:keyword   | scalerank:integer | min_scalerank:integer
+           GWL | [mid, military] | 9                 | [2, 4]
+// end:mv-simple-result
+;
+
+byMultivaluedMvExpand
+required_capability: inlinestats
+
+FROM airports
+| KEEP abbrev, type, scalerank
+| MV_EXPAND type
+| INLINESTATS min_scalerank=MIN(scalerank) BY type
+| SORT min_scalerank ASC
+| WHERE abbrev == "GWL";
+
+abbrev:keyword | type:keyword | scalerank:integer | min_scalerank:integer
+           GWL |          mid | 9                 | 2
+           GWL |     military | 9                 | 4
+;
+
+byMvExpand
+required_capability: inlinestats
+
+// tag::extreme-airports[]
+FROM airports
+| MV_EXPAND type
+| EVAL lat = ST_Y(location)
+| INLINESTATS most_northern=MAX(lat), most_southern=MIN(lat) BY type
+| WHERE lat == most_northern OR lat == most_southern
+| SORT lat DESC
+| KEEP type, name, location
+// end::extreme-airports[]
+;
+
+// tag::extreme-airports-result[]
+ type:keyword |           name:text           | location:geo_point
+          mid |             Svalbard Longyear | POINT (15.495229 78.246717)
+        major |                Tromsø Langnes | POINT (18.9072624292132 69.6796790473478)
+     military | Severomorsk-3 (Murmansk N.E.) | POINT (33.2903527616285 69.0168711826804)
+    spaceport |           Baikonur Cosmodrome | POINT (63.307354423875 45.9635739403124)
+        small |                       Dhamial | POINT (73.0320498392002 33.5614146278861)
+        small |                      Sahnewal | POINT (75.9570722403652 30.8503598561702)
+    spaceport |       Centre Spatial Guyanais | POINT (-52.7684296893452 5.23941001258035)
+     military |         Santos Air Force Base | POINT (-46.3052704931003 -23.9237590410637)
+        major |            Christchurch Int'l | POINT (172.538675565223 -43.4885486784104)
+          mid |          Hermes Quijada Int'l | POINT (-67.7530268462675 -53.7814746058316)
+// end::extreme-airports-result[]
+;
+
+brokenwhy-Ignore
+required_capability: inlinestats
+
+FROM airports
+| INLINESTATS min_scalerank=MIN(scalerank) BY type
+| MV_EXPAND type
+| WHERE scalerank == MV_MIN(scalerank);
+
+abbrev:keyword |  type:keyword   | scalerank:integer | min_scalerank:integer
+           GWL | [mid, military] | 9                 | [2, 4]
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
@@ -1857,3 +1857,17 @@ warning:Line 3:17: java.lang.ArithmeticException: / by zero
 w_avg:double
 null
 ;
+
+docsStatsMultiple
+// tag::mv-group[]
+FROM airports
+| STATS min_scalerank=MIN(scalerank) BY type
+| SORT min_scalerank ASC
+// end::mv-group[]
+;
+
+// tag::mv-group-result[]
+min_scalerank:integer | type:keyword
+                    3 | 
+// end::mv-group-result[]
+;