diff --git a/docs/changelog/142700.yaml b/docs/changelog/142700.yaml new file mode 100644 index 0000000000000..e77f072eb751b --- /dev/null +++ b/docs/changelog/142700.yaml @@ -0,0 +1,7 @@ +area: ES|QL +issues: + - 140134 + - 141083 +pr: 142700 +summary: Do not push sort on many keyword fields to lucene +type: bug diff --git a/muted-tests.yml b/muted-tests.yml index 4a3014ce3a886..da1b15eaf5c66 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -252,9 +252,6 @@ tests: - class: org.elasticsearch.xpack.esql.heap_attack.HeapAttackSubqueryIT method: testGiantTextFieldInSubqueryIntermediateResultsWithSort issue: https://github.com/elastic/elasticsearch/issues/141034 -- class: org.elasticsearch.xpack.esql.heap_attack.HeapAttackSubqueryIT - method: testManyRandomKeywordFieldsInSubqueryIntermediateResultsWithSortManyFields - issue: https://github.com/elastic/elasticsearch/issues/141083 - class: org.elasticsearch.xpack.transform.checkpoint.TransformCheckpointServiceNodeTests method: testGetCheckpointStats issue: https://github.com/elastic/elasticsearch/issues/141112 diff --git a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java index 91da46da6c842..1a80f58327914 100644 --- a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java +++ b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java @@ -730,6 +730,7 @@ void initManyBigFieldsIndex(int docs, String type, boolean random) throws IOExce int docsPerBulk = 5; int fields = 1000; int fieldSize = Math.toIntExact(ByteSizeValue.ofKb(1).getBytes()); + boolean numeric = type.equalsIgnoreCase("integer") || type.equalsIgnoreCase("long") || type.equalsIgnoreCase("double"); Request request = new Request("PUT", "/manybigfields"); XContentBuilder config = JsonXContent.contentBuilder().startObject(); @@ -755,10 +756,14 @@ void initManyBigFieldsIndex(int docs, String type, boolean random) throws IOExce } else { bulk.append(", "); } - bulk.append('"').append("f").append(String.format(Locale.ROOT, "%03d", f)).append("\": \""); - // if requested, generate random string to hit the CBE faster - bulk.append(random ? randomAlphaOfLength(1024) : Integer.toString(f % 10).repeat(fieldSize)); - bulk.append('"'); + bulk.append('"').append("f").append(String.format(Locale.ROOT, "%03d", f)).append("\": "); + if (numeric) { + bulk.append(randomNumericValue(type)); + } else { + bulk.append('"'); + bulk.append(random ? randomAlphaOfLength(1024) : Integer.toString(f % 10).repeat(fieldSize)); + bulk.append('"'); + } } bulk.append("}\n"); if (d % docsPerBulk == docsPerBulk - 1 && d != docs - 1) { @@ -769,6 +774,15 @@ void initManyBigFieldsIndex(int docs, String type, boolean random) throws IOExce initIndex("manybigfields", bulk.toString()); } + private static String randomNumericValue(String type) { + return switch (type.toLowerCase(Locale.ROOT)) { + case "integer" -> Integer.toString(randomInt()); + case "long" -> Long.toString(randomLong()); + case "double" -> Double.toString(randomDouble()); + default -> throw new IllegalArgumentException("unsupported numeric type: " + type); + }; + } + void initGiantTextField(int docs, boolean includeId, long fieldSizeInMb) throws IOException { int docsPerBulk = isServerless() ? 3 : 10; logger.info("loading many documents with one big text field - docs per bulk {}", docsPerBulk); diff --git a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackSubqueryIT.java b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackSubqueryIT.java index e8b099f233428..28e57c1c47b55 100644 --- a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackSubqueryIT.java +++ b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackSubqueryIT.java @@ -11,6 +11,7 @@ import org.apache.lucene.tests.util.TimeUnits; import org.elasticsearch.Build; +import org.elasticsearch.client.ResponseException; import org.elasticsearch.test.ListMatcher; import org.junit.Before; @@ -86,9 +87,6 @@ public void testManyRandomKeywordFieldsInSubqueryIntermediateResults() throws IO * This is mainly to test TopNOperator, addInput triggers CBE. */ public void testManyRandomKeywordFieldsInSubqueryIntermediateResultsWithSortOneField() throws IOException { - if (isServerless()) { // 500 docs OOM in serverless - return; - } int docs = 500; // 500MB random/unique keyword values heapAttackIT.initManyBigFieldsIndex(docs, "keyword", true); for (int subquery : List.of(DEFAULT_SUBQUERIES, MAX_SUBQUERIES)) { @@ -102,27 +100,46 @@ public void testManyRandomKeywordFieldsInSubqueryIntermediateResultsWithSortOneF * This is mainly to test TopNOperator. */ public void testManyRandomKeywordFieldsInSubqueryIntermediateResultsWithSortManyFields() throws IOException { - if (isServerless()) { // both 100 and 500 docs OOM in serverless - return; - } - int docs = 500; // // 500MB random/unique keyword values + int docs = 500; // 500MB random/unique keyword values heapAttackIT.initManyBigFieldsIndex(docs, "keyword", true); - // Some data points: - // 1. Sort on 999 fields, with 500 * 999 random values, without subquery fail/OOM in lucene, LeafFieldComparator - // 2. Sort on 20 fields(500*20 random values), 2 subqueries trigger CBE, 8 subqueries trigger OOM, haven't found a walkaround yet. StringBuilder sortKeys = new StringBuilder(); sortKeys.append("f000"); for (int f = 1; f < 100; f++) { sortKeys.append(", f").append(String.format(Locale.ROOT, "%03d", f)); } - // TODO skip 8 subqueries with sort 100 fields, as it OOMs, seems like the constrain is in reading data from lucene, - // LuceneTopNSourceOperator.NonScoringPerShardCollector is the main memory consumer, - // MultiLeafFieldComparator seems big but it is only about 15% of the size of NonScoringPerShardCollector, - for (int subquery : List.of(DEFAULT_SUBQUERIES)) { + for (int subquery : List.of(DEFAULT_SUBQUERIES, MAX_SUBQUERIES)) { assertCircuitBreaks(attempt -> buildSubqueriesWithSort(subquery, "manybigfields", sortKeys.toString())); } } + public void testManyRandomNumericFieldsInSubqueryIntermediateResultsWithSortManyFields() throws IOException { + int docs = 1000; + String type = randomFrom("integer", "long", "double"); + heapAttackIT.initManyBigFieldsIndex(docs, type, true); + StringBuilder sortKeys = new StringBuilder(); + sortKeys.append("f000"); + for (int f = 1; f < 100; f++) { + sortKeys.append(", f").append(String.format(Locale.ROOT, "%03d", f)); + } + ListMatcher columns = matchesList(); + for (int f = 0; f < 1000; f++) { + columns = columns.item(matchesMap().entry("name", "f" + String.format(Locale.ROOT, "%03d", f)).entry("type", type)); + } + for (int subquery : List.of(MAX_SUBQUERIES)) { + // results are returned from non-serverless environment, but CBE is expected in serverless + try { + Map response = buildSubqueriesWithSort(subquery, "manybigfields", sortKeys.toString()); + assertMap(response, matchesMap().entry("columns", columns)); + } catch (ResponseException e) { + Map map = responseAsMap(e.getResponse()); + assertMap( + map, + matchesMap().entry("status", 429).entry("error", matchesMap().extraOk().entry("type", "circuit_breaking_exception")) + ); + } + } + } + /* * The index's size is 1MB * 500, each field has 500 unique/random text values, and these queries don't have aggregation or sort. * CBE is not triggered here. @@ -363,6 +380,6 @@ private Map buildSubqueriesWithSort(int subqueries, String index query.append(", ").append(subquery); } query.append(" \"}"); - return responseAsMap(query(query.toString(), "columns,values")); + return responseAsMap(query(query.toString(), "columns")); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java index dc6eab059d8ba..078928a83e80e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.Foldables; import org.elasticsearch.xpack.esql.expression.Order; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; @@ -69,7 +70,8 @@ protected PhysicalPlan rule(TopNExec topNExec, LocalPhysicalOptimizerContext ctx ctx.plannerSettings(), ctx.foldCtx(), topNExec, - LucenePushdownPredicates.from(ctx.searchStats(), ctx.flags()) + LucenePushdownPredicates.from(ctx.searchStats(), ctx.flags()), + resolveMaxKeywordSortFields(ctx) ); return pushable.rewrite(topNExec); } @@ -135,13 +137,15 @@ private static Pushable evaluatePushable( PlannerSettings plannerSettings, FoldContext ctx, TopNExec topNExec, - LucenePushdownPredicates lucenePushdownPredicates + LucenePushdownPredicates lucenePushdownPredicates, + int maxKeywordSortFields ) { PhysicalPlan child = topNExec.child(); if (child instanceof EsQueryExec queryExec && queryExec.canPushSorts() && canPushDownOrders(topNExec.order(), lucenePushdownPredicates) - && canPushLimit(topNExec, plannerSettings)) { + && canPushLimit(topNExec, plannerSettings) + && tooManyKeywordSortFields(topNExec.order(), maxKeywordSortFields) == false) { // With the simplest case of `FROM index | SORT ...` we only allow pushing down if the sort is on a field return new PushableQueryExec(queryExec); } @@ -205,7 +209,7 @@ && canPushLimit(topNExec, plannerSettings)) { break; } } - if (pushableSorts.isEmpty() == false) { + if (pushableSorts.isEmpty() == false && tooManyKeywordFieldSorts(pushableSorts, maxKeywordSortFields) == false) { return new PushableCompoundExec(evalExec, queryExec, pushableSorts); } } @@ -237,4 +241,48 @@ private static List buildFieldSorts(List orders) { } return sorts; } + + /** + * Resolves the effective maximum number of keyword sort fields for Lucene pushdown. + * The query-level pragma takes precedence when set to a non-negative value; + * otherwise the cluster-level planner setting is used. + */ + private static int resolveMaxKeywordSortFields(LocalPhysicalOptimizerContext ctx) { + int pragmaValue = ctx.configuration().pragmas().maxKeywordSortFields(); + return pragmaValue >= 0 ? pragmaValue : ctx.plannerSettings().maxKeywordSortFields(); + } + + /** + * Returns {@code true} if the number of keyword {@link FieldAttribute} sort fields in the given orders + * exceeds {@code maxKeywordSortFields}. Used on the simple pushdown path where orders reference + * field attributes directly. + */ + private static boolean tooManyKeywordSortFields(List orders, int maxKeywordSortFields) { + int count = 0; + for (Order order : orders) { + if (order.child() instanceof FieldAttribute fa && fa.dataType() == DataType.KEYWORD) { + if (++count > maxKeywordSortFields) { + return true; + } + } + } + return false; + } + + /** + * Returns {@code true} if the number of keyword {@link EsQueryExec.FieldSort} entries in the given sorts + * exceeds {@code maxKeywordSortFields}. Used on the compound pushdown path. + */ + private static boolean tooManyKeywordFieldSorts(List sorts, int maxKeywordSortFields) { + int count = 0; + for (EsQueryExec.Sort sort : sorts) { + if (sort instanceof EsQueryExec.FieldSort fs && fs.resulType() == DataType.KEYWORD) { + if (++count > maxKeywordSortFields) { + return true; + } + } + } + return false; + } + } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerSettings.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerSettings.java index 92caf523bcc0b..b4329eb0912cf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerSettings.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerSettings.java @@ -141,6 +141,19 @@ public class PlannerSettings { Setting.Property.Dynamic ); + /** + * Maximum number of keyword sort fields allowed when pushing TopN to Lucene. + * Sorting on many keyword fields in Lucene can be expensive. When exceeded, + * the sort falls back to the compute engine. + */ + public static final Setting MAX_KEYWORD_SORT_FIELDS = Setting.intSetting( + "esql.max_keyword_sort_fields", + 10, + 0, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + public static List> settings() { return List.of( DEFAULT_DATA_PARTITIONING, @@ -152,7 +165,8 @@ public static List> settings() { PARTIAL_AGGREGATION_EMIT_UNIQUENESS_THRESHOLD, REUSE_COLUMN_LOADERS_THRESHOLD, BLOCK_LOADER_SIZE_ORDINALS, - BLOCK_LOADER_SIZE_SCRIPT + BLOCK_LOADER_SIZE_SCRIPT, + MAX_KEYWORD_SORT_FIELDS ); } @@ -182,6 +196,7 @@ public Holder(ClusterService clusterService) { ); clusterSettings.initializeAndWatch(BLOCK_LOADER_SIZE_ORDINALS, v -> settings.updateAndGet(s -> s.blockLoaderSizeOrdinals(v))); clusterSettings.initializeAndWatch(BLOCK_LOADER_SIZE_SCRIPT, v -> settings.updateAndGet(s -> s.blockLoaderSizeOrdinals(v))); + clusterSettings.initializeAndWatch(MAX_KEYWORD_SORT_FIELDS, v -> settings.updateAndGet(s -> s.maxKeywordSortFields(v))); } public PlannerSettings get() { @@ -198,6 +213,7 @@ public PlannerSettings get() { private final int reuseColumnLoadersThreshold; private final ByteSizeValue blockLoaderSizeOrdinals; private final ByteSizeValue blockLoaderSizeScript; + private final int maxKeywordSortFields; /** * Defaults. @@ -211,7 +227,8 @@ public PlannerSettings get() { PARTIAL_AGGREGATION_EMIT_UNIQUENESS_THRESHOLD.getDefault(Settings.EMPTY), REUSE_COLUMN_LOADERS_THRESHOLD.getDefault(Settings.EMPTY), BLOCK_LOADER_SIZE_ORDINALS.getDefault(Settings.EMPTY), - BLOCK_LOADER_SIZE_SCRIPT.getDefault(Settings.EMPTY) + BLOCK_LOADER_SIZE_SCRIPT.getDefault(Settings.EMPTY), + MAX_KEYWORD_SORT_FIELDS.getDefault(Settings.EMPTY) ); /** @@ -226,7 +243,8 @@ public PlannerSettings( double partialEmitUniquenessThreshold, int reuseColumnLoadersThreshold, ByteSizeValue blockLoaderSizeOrdinals, - ByteSizeValue blockLoaderSizeScript + ByteSizeValue blockLoaderSizeScript, + int maxKeywordSortFields ) { this.defaultDataPartitioning = defaultDataPartitioning; this.valuesLoadingJumboSize = valuesLoadingJumboSize; @@ -237,6 +255,7 @@ public PlannerSettings( this.reuseColumnLoadersThreshold = reuseColumnLoadersThreshold; this.blockLoaderSizeOrdinals = blockLoaderSizeOrdinals; this.blockLoaderSizeScript = blockLoaderSizeScript; + this.maxKeywordSortFields = maxKeywordSortFields; } public PlannerSettings defaultDataPartitioning(DataPartitioning defaultDataPartitioning) { @@ -249,7 +268,8 @@ public PlannerSettings defaultDataPartitioning(DataPartitioning defaultDataParti partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -267,7 +287,8 @@ public PlannerSettings valuesLoadingJumboSize(ByteSizeValue valuesLoadingJumboSi partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -285,7 +306,8 @@ public PlannerSettings luceneTopNLimit(int luceneTopNLimit) { partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -317,7 +339,8 @@ public PlannerSettings intermediateLocalRelationMaxSize(ByteSizeValue intermedia partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -335,7 +358,8 @@ public PlannerSettings partialEmitKeysThreshold(int partialEmitKeysThreshold) { partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -353,7 +377,8 @@ public PlannerSettings partialEmitUniquenessThreshold(double partialEmitUniquene partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -371,7 +396,8 @@ public PlannerSettings reuseColumnLoadersThreshold(int reuseColumnLoadersThresho partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -396,7 +422,8 @@ public PlannerSettings blockLoaderSizeOrdinals(ByteSizeValue blockLoaderSizeOrdi partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -417,7 +444,8 @@ public PlannerSettings blockLoaderSizeScript(ByteSizeValue blockLoaderSizeScript partialEmitUniquenessThreshold, reuseColumnLoadersThreshold, blockLoaderSizeOrdinals, - blockLoaderSizeScript + blockLoaderSizeScript, + maxKeywordSortFields ); } @@ -427,4 +455,23 @@ public PlannerSettings blockLoaderSizeScript(ByteSizeValue blockLoaderSizeScript public ByteSizeValue blockLoaderSizeScript() { return blockLoaderSizeScript; } + + public PlannerSettings maxKeywordSortFields(int maxKeywordSortFields) { + return new PlannerSettings( + defaultDataPartitioning, + valuesLoadingJumboSize, + luceneTopNLimit, + intermediateLocalRelationMaxSize, + partialEmitKeysThreshold, + partialEmitUniquenessThreshold, + reuseColumnLoadersThreshold, + blockLoaderSizeOrdinals, + blockLoaderSizeScript, + maxKeywordSortFields + ); + } + + public int maxKeywordSortFields() { + return maxKeywordSortFields; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/QueryPragmas.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/QueryPragmas.java index 83043a23415c2..bd08acb62eaa6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/QueryPragmas.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/QueryPragmas.java @@ -94,6 +94,14 @@ public final class QueryPragmas implements Writeable { */ public static final Setting ROUNDTO_PUSHDOWN_THRESHOLD = Setting.intSetting("roundto_pushdown_threshold", -1, -1); + /** + * Query-level override for the maximum number of keyword sort fields allowed when pushing TopN to Lucene. + * Defaults to {@code -1}, meaning the cluster-level setting {@link PlannerSettings#MAX_KEYWORD_SORT_FIELDS} is used. + * When set to a value {@code >= 0}, it overrides the cluster-level threshold for this query only. + * The resolution logic lives in {@code PushTopNToSource}. + */ + public static final Setting MAX_KEYWORD_SORT_FIELDS = Setting.intSetting("max_keyword_sort_fields", -1, -1); + public static final Setting FORK_IMPLICIT_LIMIT = Setting.boolSetting("fork_implicit_limit", true); public static final QueryPragmas EMPTY = new QueryPragmas(Settings.EMPTY); @@ -224,6 +232,10 @@ public boolean forkImplicitLimit() { return FORK_IMPLICIT_LIMIT.get(settings); } + public int maxKeywordSortFields() { + return MAX_KEYWORD_SORT_FIELDS.get(settings); + } + public int partialAggregationEmitKeysThreshold(int defaultThreshold) { if (settings.hasValue(PlannerSettings.PARTIAL_AGGREGATION_EMIT_KEYS_THRESHOLD.getKey())) { return PlannerSettings.PARTIAL_AGGREGATION_EMIT_KEYS_THRESHOLD.get(settings); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java index 8871331fb4d04..80619b44a086a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.optimizer.rules.physical.local; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.utils.GeometryValidator; import org.elasticsearch.geometry.utils.WellKnownBinary; @@ -37,6 +38,7 @@ import org.elasticsearch.xpack.esql.plan.physical.TopNExec; import org.elasticsearch.xpack.esql.planner.PlannerSettings; import org.elasticsearch.xpack.esql.plugin.EsqlFlags; +import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.stats.SearchStats; import java.io.IOException; @@ -56,6 +58,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushTopNToSourceTests.TestPhysicalPlanBuilder.from; import static org.elasticsearch.xpack.esql.plan.physical.AbstractPhysicalPlanSerializationTests.randomEstimatedRowSize; +import static org.elasticsearch.xpack.esql.plugin.QueryPragmas.MAX_KEYWORD_SORT_FIELDS; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; @@ -382,6 +385,114 @@ public void testSortGeoDistanceFunctionAndFieldsAndManyAliasesAndScore() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testKeywordSortFieldsAboveLimit() { + // FROM index | SORT kw0, kw1, ..., kw10 | LIMIT 10 + var builder = from("index"); + for (int i = 0; i < 11; i++) { + builder.sort("kw" + i); + } + builder.limit(10); + assertNoPushdownSort(builder, "when more than 10 keyword sort fields"); + } + + public void testKeywordSortFieldsBelowLimit() { + // FROM index | SORT kw0, kw1, ..., kw9 | LIMIT 10 + var builder = from("index"); + for (int i = 0; i < randomIntBetween(1, 9); i++) { + builder.sort("kw" + i); + } + builder.limit(10); + assertPushdownSort(builder); + } + + public void testMixedTypeSortFieldsKeywordBelowLimit() { + // FROM index | SORT kw0, ..., kw9, integer | LIMIT 10 + var builder = from("index"); + for (int i = 0; i < 10; i++) { + builder.sort("kw" + i); + } + builder.sort("integer"); + builder.limit(10); + assertPushdownSort(builder); + } + + public void testKeywordSortFieldsAboveLimitWithEval() { + // FROM index | EVAL x = keyword | SORT kw0, kw1, ..., kw10, x | LIMIT 10 + var builder = from("index"); + builder.eval("x", e -> e.field("keyword")); + for (int i = 0; i < 10; i++) { + builder.sort("kw" + i); + } + builder.sort("x"); + builder.limit(10); + assertNoPushdownSort(builder, "when more than 10 keyword sort fields with eval"); + } + + public void testKeywordSortFieldsBelowLimitWithEval() { + // FROM index | EVAL x = keyword | SORT kw0, kw1, ..., kw8, x | LIMIT 10 + var builder = from("index"); + builder.eval("x", e -> e.field("keyword")); + for (int i = 0; i < 9; i++) { + builder.sort("kw" + i); + } + builder.sort("x"); + builder.limit(10); + assertPushdownSort(builder, Map.of("x", "keyword"), List.of(EvalExec.class, EsQueryExec.class)); + } + + public void testPragmaIncreasesMaxKeywordSortFields() { + // 11 keyword sorts would normally be blocked, but pragma raises the limit + var builder = from("index"); + for (int i = 0; i < 11; i++) { + builder.sort("kw" + i); + } + builder.limit(10); + var pragmas = new QueryPragmas(Settings.builder().put(MAX_KEYWORD_SORT_FIELDS.getKey(), randomIntBetween(11, 20)).build()); + var topNExec = builder.build(); + var result = pushTopNToSource(topNExec, pragmas); + assertPushdownSort(result, builder.orders, null, List.of(EsQueryExec.class)); + } + + public void testPragmaDecreasesMaxKeywordSortFields() { + // 10 keyword sorts would normally be allowed, but pragma lowers the limit + var builder = from("index"); + for (int i = 0; i < 10; i++) { + builder.sort("kw" + i); + } + builder.limit(10); + var pragmas = new QueryPragmas(Settings.builder().put(MAX_KEYWORD_SORT_FIELDS.getKey(), randomIntBetween(1, 9)).build()); + + var topNExec = builder.build(); + var result = pushTopNToSource(topNExec, pragmas); + assertNoPushdownSort(result, "query pragma limits keyword sorts to less than 10"); + } + + public void testPlannerSettingOverridesMaxKeywordSortFields() { + // 11 keyword sorts would normally be blocked, but planner setting raises the limit + var builder = from("index"); + for (int i = 0; i < 11; i++) { + builder.sort("kw" + i); + } + builder.limit(10); + var plannerSettings = PlannerSettings.DEFAULTS.maxKeywordSortFields(randomIntBetween(11, 20)); + var topNExec = builder.build(); + var result = pushTopNToSource(topNExec, QueryPragmas.EMPTY, plannerSettings); + assertPushdownSort(result, builder.orders, null, List.of(EsQueryExec.class)); + } + + public void testPlannerSettingLowersMaxKeywordSortFields() { + // 10 keyword sorts would normally be allowed, but planner setting lowers the limit + var builder = from("index"); + for (int i = 0; i < 10; i++) { + builder.sort("kw" + i); + } + builder.limit(10); + var plannerSettings = PlannerSettings.DEFAULTS.maxKeywordSortFields(randomIntBetween(1, 9)); + var topNExec = builder.build(); + var result = pushTopNToSource(topNExec, QueryPragmas.EMPTY, plannerSettings); + assertNoPushdownSort(result, "planner setting limits keyword sorts to less than 10"); + } + private static void assertPushdownSort(TestPhysicalPlanBuilder builder) { assertPushdownSort(builder, null, List.of(EsQueryExec.class)); } @@ -418,9 +529,17 @@ private static void assertNoPushdownSort(TestPhysicalPlanBuilder builder, String } private static PhysicalPlan pushTopNToSource(TopNExec topNExec) { - var configuration = EsqlTestUtils.configuration("from test"); + return pushTopNToSource(topNExec, QueryPragmas.EMPTY); + } + + private static PhysicalPlan pushTopNToSource(TopNExec topNExec, QueryPragmas pragmas) { + return pushTopNToSource(topNExec, pragmas, PlannerSettings.DEFAULTS); + } + + private static PhysicalPlan pushTopNToSource(TopNExec topNExec, QueryPragmas pragmas, PlannerSettings plannerSettings) { + var configuration = EsqlTestUtils.configuration(pragmas, "from test"); var ctx = new LocalPhysicalOptimizerContext( - PlannerSettings.DEFAULTS, + plannerSettings, new EsqlFlags(true), configuration, FoldContext.small(), @@ -510,6 +629,9 @@ private static void addSortableFieldAttributes(Map field addFieldAttribute(fields, "double", DOUBLE); addFieldAttribute(fields, "keyword", KEYWORD); addFieldAttribute(fields, "location", GEO_POINT); + for (int i = 0; i < 20; i++) { + addFieldAttribute(fields, "kw" + i, KEYWORD); + } } private static void addFieldAttribute(Map fields, String name, DataType type) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java index 6b0d9d3e22494..a73bad5c44a3c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java @@ -286,7 +286,8 @@ public void testTimeSeries() throws IOException { randomDoubleBetween(0.1, 1.0, true), between(0, 1000), MappedFieldType.BlockLoaderContext.DEFAULT_ORDINALS_BYTE_SIZE, - MappedFieldType.BlockLoaderContext.DEFAULT_SCRIPT_BYTE_SIZE + MappedFieldType.BlockLoaderContext.DEFAULT_SCRIPT_BYTE_SIZE, + 10 ); LocalExecutionPlanner.LocalExecutionPlan plan = planner().plan( "test",