Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
5753f59
[ESQL] Adds parsing and planner wiring for LIMIT BY
ncordon Mar 12, 2026
9052bcb
Removes duplicated LIMIT in data node
ncordon Mar 12, 2026
41d3029
Addresses copilot review
ncordon Mar 12, 2026
724436f
Fixes last tests
ncordon Mar 12, 2026
87a6f9c
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 13, 2026
19b968d
Fixes merge
ncordon Mar 13, 2026
70641f6
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 13, 2026
4dd8922
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 13, 2026
5acbd44
Removes coordinator thingy from LimitBy
ncordon Mar 13, 2026
98d0f9a
Addresses some pr review feedback
ncordon Mar 16, 2026
0300b23
[CI] Auto commit changes from spotless
Mar 16, 2026
a68f6c8
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 16, 2026
7c26ac1
Reverts some changes
ncordon Mar 16, 2026
b91d97c
Adds qualified names
ncordon Mar 17, 2026
6e2f9df
Cleans parser tests
ncordon Mar 17, 2026
b34081c
Add LOOKUP JOIN tests
ncordon Mar 17, 2026
25ae635
Corrects comment
ncordon Mar 17, 2026
31ffe90
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 17, 2026
8d9a7eb
Solves compilation errors
ncordon Mar 17, 2026
91f8649
Removes non relevant test
ncordon Mar 17, 2026
4e95450
Fixes golden tests
ncordon Mar 18, 2026
8a855f7
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 18, 2026
e6b0a8c
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 18, 2026
8c0c750
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 18, 2026
adaf691
Gates all LIMIT_BY tests
ncordon Mar 18, 2026
3c69444
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 18, 2026
aa83c35
Renames limit -> limitPerGroup
ncordon Mar 18, 2026
6a03c4e
Adds missing capability
ncordon Mar 18, 2026
61c7f98
Merge remote-tracking branch 'upstream/main' into esql-limit-by-plann…
ncordon Mar 18, 2026
3893bf6
Last nits
ncordon Mar 18, 2026
6cf1897
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 18, 2026
8107ab6
Merge branch 'main' into esql-limit-by-planner-alt
ncordon Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
import org.elasticsearch.xpack.esql.plan.QuerySettings;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Explain;
import org.elasticsearch.xpack.esql.plan.logical.Limit;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
Expand Down Expand Up @@ -230,6 +231,7 @@
import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.VALUE;
import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.junit.Assert.assertNotNull;
Expand Down Expand Up @@ -818,6 +820,20 @@ public static Limit asLimit(Object node, Integer limitLiteral, Boolean duplicate
return limit;
}

/**
* Assert that an {@link Eval}'s fields are literal-valued aliases with the given names and values (in order).
*/
public static Eval assertEvalFields(Eval eval, String[] names, Object[] values) {
var fields = eval.fields();
Assert.assertEquals(names.length, fields.size());
Assert.assertEquals(names.length, values.length);
for (int i = 0; i < names.length; i++) {
assertThat(fields.get(i).name(), equalTo(names[i]));
assertThat(as(fields.get(i).child(), Literal.class).value(), equalTo(values[i]));
}
return eval;
}

public static Map<String, EsField> loadMapping(String name) {
return LoadMapping.loadMapping(name);
}
Expand Down
271 changes: 271 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/limit.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,274 @@ emp_no:integer
10004
10005
;

//
// LIMIT BY
//

limitBy
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10005, 10006)
| SORT emp_no
| LIMIT 1000
Copy link
Copy Markdown
Contributor

@idegtiarenko idegtiarenko Mar 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this limit here and in other cases? I believe we have much fewer records in employees, especially above condition.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it to avoid sorts over the entire result set?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically no, as the default "limit 1000" automatically added. We can remove it

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it is to avoid warning about default limit?
I do not think it is suppressed by new limit by, is it?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not, so yes, it could be for the warning 👀

Copy link
Copy Markdown
Member Author

@ncordon ncordon Mar 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want stable results so we have to use SORT, but we cannot use SORT + LIMIT BY yet because we need a different logical and physical plan for that, so it's disallowed. The intermediate LIMIT is just temporary. I've added removing it as a task to do in the future so we don't forget: https://github.com/elastic/esql-planning/issues/262

| LIMIT 5 BY languages
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10002 | Bezalel | 5
10003 | Parto | 4
10005 | Kyoichi | 1
10006 | Anneke | 3
;

limitByLimit0
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003)
| SORT emp_no
| LIMIT 1000
| LIMIT 0 BY languages
| KEEP emp_no, languages
;

emp_no:integer | languages:integer
;

limitByMultipleColumns
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10003, 10004, 10007)
| SORT emp_no
| LIMIT 1000
| LIMIT 5 BY languages, gender
| KEEP emp_no, first_name, languages, gender
;

emp_no:integer | first_name:keyword | languages:integer | gender:keyword
10001 | Georgi | 2 | M
10003 | Parto | 4 | M
10004 | Chirstian | 5 | M
10007 | Tzvetan | 4 | F
;

limitByWithExpression
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| SORT emp_no
| LIMIT 1000
| LIMIT 5 BY languages * 2
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10002 | Bezalel | 5
10003 | Parto | 4
10004 | Chirstian | 5
10005 | Kyoichi | 1
;

limitByMultivalueGroupKey
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10008)
| SORT emp_no
| LIMIT 1000
| LIMIT 5 BY job_positions
| KEEP emp_no, first_name, job_positions
;

emp_no:integer | first_name:keyword | job_positions:keyword
10001 | Georgi | [Accountant, Senior Python Developer]
10008 | Saniya | [Internship, Junior Developer, Purchase Manager, Senior Python Developer]
;

limitByNullGroup
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10020)
| SORT emp_no
| LIMIT 1000
| LIMIT 5 BY languages
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10020 | Mayuko | null
;

limitByConstant
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10005, 10006)
| SORT emp_no
| LIMIT 1000
| LIMIT 2 BY 5*42
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10002 | Bezalel | 5
;

limitByWithExpressionAndConstant
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| SORT emp_no
| LIMIT 1000
| LIMIT 5 BY languages * 2, 20 - 5 * 2
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10002 | Bezalel | 5
10003 | Parto | 4
10004 | Chirstian | 5
10005 | Kyoichi | 1
;

limitByWithAlias
required_capability: esql_limit_by

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| SORT emp_no
| LIMIT 1000
| EVAL g = languages * 2
| LIMIT 5 BY g
| KEEP emp_no, first_name, languages
;

emp_no:integer | first_name:keyword | languages:integer
10001 | Georgi | 2
10002 | Bezalel | 5
10003 | Parto | 4
10004 | Chirstian | 5
10005 | Kyoichi | 1
;

limitByThenStats
required_capability: esql_limit_by

FROM employees
| LIMIT 2 BY languages
| STATS c = COUNT(*) BY languages
| SORT languages ASC NULLS LAST
;

c:long | languages:integer
2 | 1
2 | 2
2 | 3
2 | 4
2 | 5
2 | null
;

limitByPrecededByStats
required_capability: esql_limit_by

FROM employees
| STATS cnt=COUNT(*) BY job_positions, languages
| SORT job_positions, cnt DESC, languages
| LIMIT 1000
| LIMIT 1 BY job_positions
| LIMIT 5
;

cnt:long | job_positions:keyword | languages:integer
5 | Accountant | 2
5 | Architect | 4
4 | Business Analyst | 2
3 | Data Scientist | 1
5 | Head Human Resources | 5
;

limitByWithLookupJoin
required_capability: esql_limit_by
required_capability: join_lookup_v12

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| EVAL language_code = languages
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| SORT emp_no, language_code, language_name
| LIMIT 1000
| LIMIT 2 BY emp_no
| KEEP emp_no, language_code, language_name
;

emp_no:integer | language_code:integer | language_name:keyword
10001 | 2 | German
10001 | 2 | German
10002 | 5 | null
10003 | 4 | Quenya
10004 | 5 | null
10005 | 1 | English
10005 | 1 | English
;

limitByShadowedNonJoinField
required_capability: esql_limit_by
required_capability: join_lookup_v12

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| EVAL language_code = languages
| EVAL language_name = 2*salary
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| SORT emp_no, language_code, language_name
| LIMIT 1000
| LIMIT 2 BY language_name
| KEEP emp_no, language_code, language_name
;

emp_no:integer | language_code:integer | language_name:keyword
10001 | 2 | German
10001 | 2 | German
10002 | 5 | null
10003 | 4 | Quenya
10004 | 5 | null
10005 | 1 | English
10005 | 1 | English
;

limitByShadowedJoinField
required_capability: esql_limit_by
required_capability: join_lookup_v12

FROM employees
| WHERE emp_no IN (10001, 10002, 10003, 10004, 10005)
| EVAL language_code = languages
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
| SORT emp_no, language_code, language_name
| LIMIT 1000
| LIMIT 2 BY language_code
| KEEP emp_no, language_code, language_name
;

emp_no:integer | language_code:integer | language_name:keyword
10001 | 2 | German
10001 | 2 | German
10002 | 5 | null
10003 | 4 | Quenya
10004 | 5 | null
10005 | 1 | English
10005 | 1 | English
;
2 changes: 1 addition & 1 deletion x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ limitCommand
;

limitByGroupKey:
{this.isDevVersion()}? BY grouping=fields
{this.isDevVersion()}? BY booleanExpression (COMMA booleanExpression)*
;

sortCommand
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2321,8 +2321,10 @@ public enum Cap {

/**
* LIMIT n BY expr1, expr2 support for retaining at most n docs per group.
* Enables the feature without a preceding SORT.
*
*/
LIMIT_BY(Build.current().isSnapshot()),
ESQL_LIMIT_BY(Build.current().isSnapshot()),

/**
* Fix window validation in time-series aggregations when TBUCKET uses a numeric target bucket count.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@
import org.elasticsearch.xpack.esql.plan.logical.LimitBy;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.Lookup;
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
import org.elasticsearch.xpack.esql.plan.logical.Project;
import org.elasticsearch.xpack.esql.plan.logical.Subquery;
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
import org.elasticsearch.xpack.esql.plan.logical.UnionAll;
import org.elasticsearch.xpack.esql.plan.logical.promql.PromqlCommand;
import org.elasticsearch.xpack.esql.telemetry.FeatureMetric;
Expand Down Expand Up @@ -357,9 +359,20 @@ private static void checkLimitBeforeInlineStats(LogicalPlan plan, Failures failu
}
}

// TODO: remove this check when SORT + LIMIT BY (TopN) support is added
private static void checkLimitBy(LogicalPlan plan, Failures failures) {
if (plan instanceof LimitBy) {
failures.add(fail(plan, "LIMIT BY is not yet supported"));
if (plan instanceof LimitBy limitBy) {
LogicalPlan child = limitBy.child();
while (child instanceof UnaryPlan unary) {
if (child instanceof OrderBy) {
failures.add(fail(limitBy, "SORT cannot be used before LIMIT BY"));
break;
}
if (child instanceof Limit) {
break;
}
child = unary.child();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,23 @@ public static boolean equalsAsAttribute(Expression left, Expression right) {
return true;
}

public static boolean listSemanticEquals(List<Expression> leftList, List<Expression> rightList) {
if (leftList.size() != rightList.size()) {
return false;
}
for (int i = 0; i < leftList.size(); i++) {
Expression left = leftList.get(i);
Expression right = rightList.get(i);
if (left == null || right == null) {
throw new IllegalArgumentException("Unexpected null expression in list at index [" + i + "]");
}
if (left.semanticEquals(right) == false) {
return false;
}
}
return true;
}

public static List<Tuple<Attribute, Expression>> aliases(List<? extends NamedExpression> named) {
// an alias of same name and data type can be reused (by mistake): need to use a list to collect all refs (and later report them)
List<Tuple<Attribute, Expression>> aliases = new ArrayList<>();
Expand Down
Loading
Loading