diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java index f674fd201ed7e..3843b530462af 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/QueryPlanningBenchmark.java @@ -58,6 +58,7 @@ import static java.util.Collections.emptyMap; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; @Fork(1) @Warmup(iterations = 5) @@ -119,7 +120,8 @@ public void setup() { Map.of(), new EnrichResolution(), InferenceResolution.EMPTY, - minimumVersion + minimumVersion, + UNMAPPED_FIELDS.defaultValue() ), new Verifier(new Metrics(functionRegistry), new XPackLicenseState(() -> 0L)) ); diff --git a/docs/changelog/140463.yaml b/docs/changelog/140463.yaml new file mode 100644 index 0000000000000..45bed54e47841 --- /dev/null +++ b/docs/changelog/140463.yaml @@ -0,0 +1,5 @@ +pr: 140463 +summary: "Introduce support for mapping-unavailable fields (Fork from #139417)" +area: ES|QL +type: feature +issues: [] diff --git a/docs/reference/query-languages/esql/kibana/definition/settings/unmapped_fields.json b/docs/reference/query-languages/esql/kibana/definition/settings/unmapped_fields.json new file mode 100644 index 0000000000000..3ab982610e2ab --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/settings/unmapped_fields.json @@ -0,0 +1,11 @@ +{ + "comment" : "This is generated by ESQL’s DocsV3Support. Do not edit it. See ../README.md for how to regenerate it.", + "name" : "unmapped_fields", + "type" : [ + "keyword" + ], + "serverlessOnly" : false, + "preview" : true, + "snapshotOnly" : true, + "description" : "Defines how unmapped fields are treated. Possible values are: \"FAIL\" (default) - fails the query if unmapped fields are present; \"NULLIFY\" - treats unmapped fields as null values; \"LOAD\" - attempts to load the fields from the source." +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 75321d19546f2..a59f632694415 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -83,6 +83,7 @@ import org.elasticsearch.xpack.esql.analysis.AnalyzerSettings; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; import org.elasticsearch.xpack.esql.analysis.MutableAnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; import org.elasticsearch.xpack.esql.analysis.Verifier; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; @@ -220,6 +221,7 @@ import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.IDENTIFIER; import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.PATTERN; import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.VALUE; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -546,6 +548,26 @@ public static MutableAnalyzerContext testAnalyzerContext( Map lookupResolution, EnrichResolution enrichResolution, InferenceResolution inferenceResolution + ) { + return testAnalyzerContext( + configuration, + functionRegistry, + indexResolutions, + lookupResolution, + enrichResolution, + inferenceResolution, + UNMAPPED_FIELDS.defaultValue() + ); + } + + public static MutableAnalyzerContext testAnalyzerContext( + Configuration configuration, + EsqlFunctionRegistry functionRegistry, + Map indexResolutions, + Map lookupResolution, + EnrichResolution enrichResolution, + InferenceResolution inferenceResolution, + UnmappedResolution unmappedResolution ) { return new MutableAnalyzerContext( configuration, @@ -554,7 +576,8 @@ public static MutableAnalyzerContext testAnalyzerContext( lookupResolution, enrichResolution, inferenceResolution, - randomMinimumVersion() + randomMinimumVersion(), + unmappedResolution ); } @@ -1212,6 +1235,23 @@ static BytesReference randomTsId() { return routingPathFields.buildHash(); } + // lifted from org.elasticsearch.http.HttpClientStatsTrackerTests + public static String randomizeCase(String s) { + final char[] chars = s.toCharArray(); + for (int i = 0; i < chars.length; i++) { + chars[i] = randomizeCase(chars[i]); + } + return new String(chars); + } + + private static char randomizeCase(char c) { + return switch (between(1, 3)) { + case 1 -> Character.toUpperCase(c); + case 2 -> Character.toLowerCase(c); + default -> c; + }; + } + public static WildcardLike wildcardLike(Expression left, String exp) { return new WildcardLike(EMPTY, left, new WildcardPattern(exp), false); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/analysis/MutableAnalyzerContext.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/analysis/MutableAnalyzerContext.java index 1df598c483e5e..e7dd796cd77ce 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/analysis/MutableAnalyzerContext.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/analysis/MutableAnalyzerContext.java @@ -31,9 +31,19 @@ public MutableAnalyzerContext( Map lookupResolution, EnrichResolution enrichResolution, InferenceResolution inferenceResolution, - TransportVersion minimumVersion + TransportVersion minimumVersion, + UnmappedResolution unmappedResolution ) { - super(configuration, functionRegistry, indexResolution, lookupResolution, enrichResolution, inferenceResolution, minimumVersion); + super( + configuration, + functionRegistry, + indexResolution, + lookupResolution, + enrichResolution, + inferenceResolution, + minimumVersion, + unmappedResolution + ); this.currentVersion = minimumVersion; } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec new file mode 100644 index 0000000000000..b2fff4ffb70b2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-load.csv-spec @@ -0,0 +1,731 @@ +#################### +# No loading tests # +#################### +// These ones verify we don't do anything extra when unmapped_fields is not set to "load" +doesNotLoadUnmappedFieldsSort +required_capability: unmapped_fields +FROM partial_mapping_sample_data +| SORT @timestamp DESC +; + +@timestamp:datetime | client_ip:ip | event_duration:long | message:keyword +2024-10-23T13:55:01.543Z | 173.21.3.15 | 1756466 | Connected to 10.1.0.1! +2024-10-23T13:53:55.832Z | 173.21.3.15 | 5033754 | Connection error? +2024-10-23T13:52:55.015Z | 173.21.3.15 | 8268152 | Connection error? +2024-10-23T13:51:54.732Z | 173.21.3.15 | 725447 | Connection error? +2024-10-23T13:33:34.937Z | 173.21.0.5 | 1232381 | 42 +2024-10-23T12:27:28.948Z | 173.21.2.113 | 2764888 | Connected to 10.1.0.2! +2024-10-23T12:15:03.360Z | 173.21.2.162 | 3450232 | Connected to 10.1.0.3! +; + +doesNotLoadUnmappedFieldsStats +required_capability: unmapped_fields +FROM partial_mapping_sample_data +| STATS count(*) BY message +| SORT message +; + +count(*):long | message:keyword +1 | 42 +1 | Connected to 10.1.0.1! +1 | Connected to 10.1.0.2! +1 | Connected to 10.1.0.3! +3 | Connection error? +; + +doesNotLoadUnmappedFieldsInlineStats +required_capability: unmapped_fields +required_capability: inline_stats +FROM partial_mapping_sample_data +| INLINE STATS count = COUNT(*) BY message +| SORT @timestamp DESC +| LIMIT 3 +; + +@timestamp:datetime | client_ip:ip | event_duration:long | count:long | message:keyword +2024-10-23T13:55:01.543Z | 173.21.3.15 | 1756466 | 1 | Connected to 10.1.0.1! +2024-10-23T13:53:55.832Z | 173.21.3.15 | 5033754 | 3 | Connection error? +2024-10-23T13:52:55.015Z | 173.21.3.15 | 8268152 | 3 | Connection error? +; + +doesNotLoadUnmappedFieldsInlineStatsNoGrouping +required_capability: unmapped_fields +required_capability: inline_stats +FROM partial_mapping_sample_data +| INLINE STATS max_duration = MAX(event_duration) +| SORT @timestamp DESC +| LIMIT 3 +; + +@timestamp:datetime | client_ip:ip | event_duration:long | message:keyword | max_duration:long +2024-10-23T13:55:01.543Z | 173.21.3.15 | 1756466 | Connected to 10.1.0.1! | 8268152 +2024-10-23T13:53:55.832Z | 173.21.3.15 | 5033754 | Connection error? | 8268152 +2024-10-23T13:52:55.015Z | 173.21.3.15 | 8268152 | Connection error? | 8268152 +; + +doesNotLoadUnmappedFieldsEnrich +required_capability: unmapped_fields +required_capability: enrich_load +FROM partial_mapping_sample_data +| EVAL language_code = 1 +| ENRICH languages_policy ON language_code +| SORT @timestamp DESC +| LIMIT 3 +; + +@timestamp:datetime | client_ip:ip | event_duration:long | message:keyword | language_code:integer | language_name:keyword +2024-10-23T13:55:01.543Z | 173.21.3.15 | 1756466 | Connected to 10.1.0.1! | 1 | English +2024-10-23T13:53:55.832Z | 173.21.3.15 | 5033754 | Connection error? | 1 | English +2024-10-23T13:52:55.015Z | 173.21.3.15 | 8268152 | Connection error? | 1 | English +; + +doesNotLoadUnmappedFieldsLookupJoin +required_capability: unmapped_fields +required_capability: join_lookup_v12 +FROM partial_mapping_sample_data +| EVAL language_code = 1 +| LOOKUP JOIN languages_lookup ON language_code +| SORT @timestamp DESC +| LIMIT 3 +; + +@timestamp:datetime | client_ip:ip | event_duration:long | message:keyword | language_code:integer | language_name:keyword +2024-10-23T13:55:01.543Z | 173.21.3.15 | 1756466 | Connected to 10.1.0.1! | 1 | English +2024-10-23T13:53:55.832Z | 173.21.3.15 | 5033754 | Connection error? | 1 | English +2024-10-23T13:52:55.015Z | 173.21.3.15 | 8268152 | Connection error? | 1 | English +; + +###################### +# Single index tests # +###################### + +fieldIsMappedToNonKeywordSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, client_ip +| SORT @timestamp DESC +; + +@timestamp:date | client_ip:ip +2024-10-23T13:55:01.543Z | 173.21.3.15 +2024-10-23T13:53:55.832Z | 173.21.3.15 +2024-10-23T13:52:55.015Z | 173.21.3.15 +2024-10-23T13:51:54.732Z | 173.21.3.15 +2024-10-23T13:33:34.937Z | 173.21.0.5 +2024-10-23T12:27:28.948Z | 173.21.2.113 +2024-10-23T12:15:03.360Z | 173.21.2.162 +; + +fieldIsMappedToKeywordSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, message +| SORT @timestamp DESC +; + +@timestamp:datetime | message:keyword +2024-10-23T13:55:01.543Z | Connected to 10.1.0.1! +2024-10-23T13:53:55.832Z | Connection error? +2024-10-23T13:52:55.015Z | Connection error? +2024-10-23T13:51:54.732Z | Connection error? +2024-10-23T13:33:34.937Z | 42 +2024-10-23T12:27:28.948Z | Connected to 10.1.0.2! +2024-10-23T12:15:03.360Z | Connected to 10.1.0.3! +; + +unmappedFieldDoesNotAppearLast +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| SORT @timestamp DESC +| LIMIT 1 +; + +@timestamp:date |client_ip:ip |event_duration:long |message:keyword +2024-10-23T13:55:01.543Z|173.21.3.15 |1756466 |Connected to 10.1.0.1! +; + +fieldDoesNotExistSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, foo +| SORT @timestamp DESC +; + +@timestamp:date | foo:keyword +2024-10-23T13:55:01.543Z | null +2024-10-23T13:53:55.832Z | null +2024-10-23T13:52:55.015Z | null +2024-10-23T13:51:54.732Z | null +2024-10-23T13:33:34.937Z | null +2024-10-23T12:27:28.948Z | null +2024-10-23T12:15:03.360Z | null +; + +fieldIsUnmappedSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, message, unmapped_message +| SORT @timestamp DESC +; + +@timestamp:date | message:keyword | unmapped_message:keyword +2024-10-23T13:55:01.543Z | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 +2024-10-23T13:53:55.832Z | Connection error? | Disconnection error +2024-10-23T13:52:55.015Z | Connection error? | Disconnection error +2024-10-23T13:51:54.732Z | Connection error? | Disconnection error +2024-10-23T13:33:34.937Z | 42 | 43 +2024-10-23T12:27:28.948Z | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 +2024-10-23T12:15:03.360Z | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 +; + +# Kept this test disabled to show the difference from IGNORE_🐔: if everywhere unmapped and not mentioned, a field (message) won't show +# up at all. +fieldIsUnmappedButSourceIsDisabledSingleIndex-Ignore +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_no_source_sample_data +; + +@timestamp:date | message:keyword +2024-10-23T13:55:01.543Z | null +2024-10-23T13:53:55.832Z | null +2024-10-23T13:52:55.015Z | null +2024-10-23T13:51:54.732Z | null +2024-10-23T13:33:34.937Z | null +2024-10-23T12:27:28.948Z | null +2024-10-23T12:15:03.360Z | null +; + +# same comment as above (fieldIsUnmappedButSourceIsDisabledSingleIndex) +fieldIsUnmappedButExcludedFromSourceSingleIndex-Ignore +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_excluded_source_sample_data +| SORT @timestamp DESC +; + +@timestamp:date | message:keyword +2024-10-23T13:55:01.543Z | null +2024-10-23T13:53:55.832Z | null +2024-10-23T13:52:55.015Z | null +2024-10-23T13:51:54.732Z | null +2024-10-23T13:33:34.937Z | null +2024-10-23T12:27:28.948Z | null +2024-10-23T12:15:03.360Z | null +; + +fieldIsNestedAndMapped +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM addresses +| KEEP city.name +| SORT city.name DESC +; + +city.name:keyword +Tokyo +San Francisco +Amsterdam +; + +fieldIsNestedAndMappedNoKeep +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM addresses +| SORT city.name DESC +; + +city.country.continent.name:keyword|city.country.continent.planet.galaxy:keyword|city.country.continent.planet.name:keyword|city.country.name:keyword|city.name:keyword|number:keyword|street:keyword|zip_code:keyword +Asia |Milky Way |Earth |Japan |Tokyo |2-7-2 |Marunouchi |100-7014 +North America |Milky Way |Earth |United States of America|San Francisco |88 |Kearny St |CA 94108 +Europe |Milky Way |Earth |Netherlands |Amsterdam |281 |Keizersgracht |1016 ED +; + +fieldIsNestedAndUnmapped +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, unmapped.nested +| SORT @timestamp +; + +@timestamp:date | unmapped.nested:keyword +2024-10-23T12:15:03.360Z | g +2024-10-23T12:27:28.948Z | f +2024-10-23T13:33:34.937Z | e +2024-10-23T13:51:54.732Z | d +2024-10-23T13:52:55.015Z | c +2024-10-23T13:53:55.832Z | b +2024-10-23T13:55:01.543Z | a +; + +fieldIsNestedAndNonExistent +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, unmapped.nested.nonexistent +| SORT @timestamp +; + +@timestamp:date | unmapped.nested.nonexistent:keyword +2024-10-23T12:15:03.360Z | null +2024-10-23T12:27:28.948Z | null +2024-10-23T13:33:34.937Z | null +2024-10-23T13:51:54.732Z | null +2024-10-23T13:52:55.015Z | null +2024-10-23T13:53:55.832Z | null +2024-10-23T13:55:01.543Z | null +; + +######################### +# Multi-parameter tests # +######################### + +noFieldExistsMultiParametersSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, foo, bar, bazz +| SORT @timestamp DESC +; + +@timestamp:date | foo:keyword | bar:keyword | bazz:keyword +2024-10-23T13:55:01.543Z | null | null | null +2024-10-23T13:53:55.832Z | null | null | null +2024-10-23T13:52:55.015Z | null | null | null +2024-10-23T13:51:54.732Z | null | null | null +2024-10-23T13:33:34.937Z | null | null | null +2024-10-23T12:27:28.948Z | null | null | null +2024-10-23T12:15:03.360Z | null | null | null +; + +mixedFieldsMultiParametersSingleIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, foo, message, unmapped_message +| SORT @timestamp DESC +; + +@timestamp:date | foo:keyword | message:keyword | unmapped_message:keyword +2024-10-23T13:55:01.543Z | null | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 +2024-10-23T13:53:55.832Z | null | Connection error? | Disconnection error +2024-10-23T13:52:55.015Z | null | Connection error? | Disconnection error +2024-10-23T13:51:54.732Z | null | Connection error? | Disconnection error +2024-10-23T13:33:34.937Z | null | 42 | 43 +2024-10-23T12:27:28.948Z | null | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 +2024-10-23T12:15:03.360Z | null | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 +; + +repeatedInsistFieldsUseTheLastEntry +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data +| KEEP @timestamp, foo, message, unmapped_message +| SORT @timestamp DESC +; + +@timestamp:date | foo:keyword | message:keyword | unmapped_message:keyword +2024-10-23T13:55:01.543Z | null | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 +2024-10-23T13:53:55.832Z | null | Connection error? | Disconnection error +2024-10-23T13:52:55.015Z | null | Connection error? | Disconnection error +2024-10-23T13:51:54.732Z | null | Connection error? | Disconnection error +2024-10-23T13:33:34.937Z | null | 42 | 43 +2024-10-23T12:27:28.948Z | null | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 +2024-10-23T12:15:03.360Z | null | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 +; + +##################### +# Multi index tests # +##################### + +mixedFieldsMultiParametersMultiIndex +required_capability: unmapped_fields +required_capability: index_metadata_field +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data, sample_data METADATA _index +| KEEP _index, @timestamp, foo, message, unmapped_message +| SORT @timestamp DESC +; + +_index:keyword | @timestamp:datetime | foo:keyword | message:keyword | unmapped_message:keyword +partial_mapping_sample_data | 2024-10-23T13:55:01.543Z | null | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 +partial_mapping_sample_data | 2024-10-23T13:53:55.832Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:52:55.015Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:51:54.732Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:33:34.937Z | null | 42 | 43 +partial_mapping_sample_data | 2024-10-23T12:27:28.948Z | null | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 +partial_mapping_sample_data | 2024-10-23T12:15:03.360Z | null | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | null | Connected to 10.1.0.1 | null +sample_data | 2023-10-23T13:53:55.832Z | null | Connection error | null +sample_data | 2023-10-23T13:52:55.015Z | null | Connection error | null +sample_data | 2023-10-23T13:51:54.732Z | null | Connection error | null +sample_data | 2023-10-23T13:33:34.937Z | null | Disconnected | null +sample_data | 2023-10-23T12:27:28.948Z | null | Connected to 10.1.0.2 | null +sample_data | 2023-10-23T12:15:03.360Z | null | Connected to 10.1.0.3 | null +; + +insistOnTopOfInsistMultiIndex +required_capability: unmapped_fields +required_capability: index_metadata_field +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data, sample_data METADATA _index +| KEEP _index, @timestamp, foo, message, unmapped_message +| SORT @timestamp DESC +; + +_index:keyword | @timestamp:datetime | foo:keyword | message:keyword | unmapped_message:keyword +partial_mapping_sample_data | 2024-10-23T13:55:01.543Z | null | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 +partial_mapping_sample_data | 2024-10-23T13:53:55.832Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:52:55.015Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:51:54.732Z | null | Connection error? | Disconnection error +partial_mapping_sample_data | 2024-10-23T13:33:34.937Z | null | 42 | 43 +partial_mapping_sample_data | 2024-10-23T12:27:28.948Z | null | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 +partial_mapping_sample_data | 2024-10-23T12:15:03.360Z | null | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | null | Connected to 10.1.0.1 | null +sample_data | 2023-10-23T13:53:55.832Z | null | Connection error | null +sample_data | 2023-10-23T13:52:55.015Z | null | Connection error | null +sample_data | 2023-10-23T13:51:54.732Z | null | Connection error | null +sample_data | 2023-10-23T13:33:34.937Z | null | Disconnected | null +sample_data | 2023-10-23T12:27:28.948Z | null | Connected to 10.1.0.2 | null +sample_data | 2023-10-23T12:15:03.360Z | null | Connected to 10.1.0.3 | null +; + +fieldDoesNotExistMultiIndex +required_capability: index_metadata_field +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data, sample_data METADATA _index +| KEEP _index, @timestamp, foo +| SORT @timestamp DESC +; + +_index:keyword | @timestamp:date | foo:keyword +partial_mapping_sample_data | 2024-10-23T13:55:01.543Z | null +partial_mapping_sample_data | 2024-10-23T13:53:55.832Z | null +partial_mapping_sample_data | 2024-10-23T13:52:55.015Z | null +partial_mapping_sample_data | 2024-10-23T13:51:54.732Z | null +partial_mapping_sample_data | 2024-10-23T13:33:34.937Z | null +partial_mapping_sample_data | 2024-10-23T12:27:28.948Z | null +partial_mapping_sample_data | 2024-10-23T12:15:03.360Z | null +sample_data | 2023-10-23T13:55:01.543Z | null +sample_data | 2023-10-23T13:53:55.832Z | null +sample_data | 2023-10-23T13:52:55.015Z | null +sample_data | 2023-10-23T13:51:54.732Z | null +sample_data | 2023-10-23T13:33:34.937Z | null +sample_data | 2023-10-23T12:27:28.948Z | null +sample_data | 2023-10-23T12:15:03.360Z | null +; + +fieldIsUnmappedMultiIndex +required_capability: index_metadata_field +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data, sample_data METADATA _index +| KEEP @timestamp, message, unmapped_message, _index +| SORT @timestamp DESC +; + +@timestamp:date | message:keyword | unmapped_message:keyword | _index:keyword +2024-10-23T13:55:01.543Z | Connected to 10.1.0.1! | Disconnected from 10.1.0.1 | partial_mapping_sample_data +2024-10-23T13:53:55.832Z | Connection error? | Disconnection error | partial_mapping_sample_data +2024-10-23T13:52:55.015Z | Connection error? | Disconnection error | partial_mapping_sample_data +2024-10-23T13:51:54.732Z | Connection error? | Disconnection error | partial_mapping_sample_data +2024-10-23T13:33:34.937Z | 42 | 43 | partial_mapping_sample_data +2024-10-23T12:27:28.948Z | Connected to 10.1.0.2! | Disconnected from 10.1.0.2 | partial_mapping_sample_data +2024-10-23T12:15:03.360Z | Connected to 10.1.0.3! | Disconnected from 10.1.0.3 | partial_mapping_sample_data +2023-10-23T13:55:01.543Z | Connected to 10.1.0.1 | null | sample_data +2023-10-23T13:53:55.832Z | Connection error | null | sample_data +2023-10-23T13:52:55.015Z | Connection error | null | sample_data +2023-10-23T13:51:54.732Z | Connection error | null | sample_data +2023-10-23T13:33:34.937Z | Disconnected | null | sample_data +2023-10-23T12:27:28.948Z | Connected to 10.1.0.2 | null | sample_data +2023-10-23T12:15:03.360Z | Connected to 10.1.0.3 | null | sample_data +; + + +fieldIsMappedToDifferentTypesMultiIndex +required_capability: index_metadata_field +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM sample_data_ts_long, sample_data METADATA _index +| KEEP _index, @timestamp +| SORT _index +; + +_index:keyword | @timestamp:unsupported +sample_data | null +sample_data | null +sample_data | null +sample_data | null +sample_data | null +sample_data | null +sample_data | null +sample_data_ts_long | null +sample_data_ts_long | null +sample_data_ts_long | null +sample_data_ts_long | null +sample_data_ts_long | null +sample_data_ts_long | null +sample_data_ts_long | null +; + +fieldIsMappedToDifferentTypesButDropped +required_capability: index_metadata_field +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM sample_data_ts_long, sample_data METADATA _index +| KEEP _index, @timestamp +| DROP @timestamp +| EVAL @timestamp = 42 +| SORT _index +; + +_index:keyword | @timestamp:integer +sample_data | 42 +sample_data | 42 +sample_data | 42 +sample_data | 42 +sample_data | 42 +sample_data | 42 +sample_data | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +sample_data_ts_long | 42 +; + +fieldIsPartiallyUnmappedMultiIndex +required_capability: index_metadata_field +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM sample_data, no_mapping_sample_data METADATA _index +| KEEP _index, message +| SORT _index, message DESC +; + +_index:keyword | message:keyword +no_mapping_sample_data | Connection error? +no_mapping_sample_data | Connection error? +no_mapping_sample_data | Connection error? +no_mapping_sample_data | Connected to 10.1.0.3! +no_mapping_sample_data | Connected to 10.1.0.2! +no_mapping_sample_data | Connected to 10.1.0.1! +no_mapping_sample_data | 42 +sample_data | Disconnected +sample_data | Connection error +sample_data | Connection error +sample_data | Connection error +sample_data | Connected to 10.1.0.3 +sample_data | Connected to 10.1.0.2 +sample_data | Connected to 10.1.0.1 +; + +fieldIsPartiallyUnmappedAndRenamedMultiIndex +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM sample_data, no_mapping_sample_data +| KEEP message +| RENAME message AS msg +| SORT msg DESC +; + +msg:keyword +Disconnected +Connection error? +Connection error? +Connection error? +Connection error +Connection error +Connection error +Connected to 10.1.0.3! +Connected to 10.1.0.3 +Connected to 10.1.0.2! +Connected to 10.1.0.2 +Connected to 10.1.0.1! +Connected to 10.1.0.1 +42 +; + +fieldIsPartiallyUnmappedPartiallySourceIsDisabledMultiIndex +required_capability: index_metadata_field +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data,partial_mapping_no_source_sample_data METADATA _index +| KEEP _index, @timestamp, message +| SORT _index, @timestamp +; + +_index:keyword | @timestamp:date | message:keyword +partial_mapping_no_source_sample_data | 2024-10-23T12:15:03.360Z | null +partial_mapping_no_source_sample_data | 2024-10-23T12:27:28.948Z | null +partial_mapping_no_source_sample_data | 2024-10-23T13:33:34.937Z | null +partial_mapping_no_source_sample_data | 2024-10-23T13:51:54.732Z | null +partial_mapping_no_source_sample_data | 2024-10-23T13:52:55.015Z | null +partial_mapping_no_source_sample_data | 2024-10-23T13:53:55.832Z | null +partial_mapping_no_source_sample_data | 2024-10-23T13:55:01.543Z | null +partial_mapping_sample_data | 2024-10-23T12:15:03.360Z | Connected to 10.1.0.3! +partial_mapping_sample_data | 2024-10-23T12:27:28.948Z | Connected to 10.1.0.2! +partial_mapping_sample_data | 2024-10-23T13:33:34.937Z | 42 +partial_mapping_sample_data | 2024-10-23T13:51:54.732Z | Connection error? +partial_mapping_sample_data | 2024-10-23T13:52:55.015Z | Connection error? +partial_mapping_sample_data | 2024-10-23T13:53:55.832Z | Connection error? +partial_mapping_sample_data | 2024-10-23T13:55:01.543Z | Connected to 10.1.0.1! +; + +partialMappingStats +required_capability: index_metadata_field +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data,partial_mapping_excluded_source_sample_data METADATA _index +| STATS max(@timestamp), count(*) BY message +| SORT message NULLS FIRST +; + +max(@timestamp):date | count(*):long | message:keyword +2024-10-23T13:55:01.543Z | 7 | null +2024-10-23T13:33:34.937Z | 1 | 42 +2024-10-23T13:55:01.543Z | 1 | Connected to 10.1.0.1! +2024-10-23T12:27:28.948Z | 1 | Connected to 10.1.0.2! +2024-10-23T12:15:03.360Z | 1 | Connected to 10.1.0.3! +2024-10-23T13:53:55.832Z | 3 | Connection error? +; + +partialMappingCoalesce +required_capability: index_metadata_field +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data,partial_mapping_excluded_source_sample_data METADATA _index +| EVAL actual_value = COALESCE(message, "no _source") +| DROP message +| KEEP @timestamp, _index, actual_value +| SORT _index, @timestamp ASC +; + +@timestamp:date | _index:keyword | actual_value:keyword +2024-10-23T12:15:03.360Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T12:27:28.948Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T13:33:34.937Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T13:51:54.732Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T13:52:55.015Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T13:53:55.832Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T13:55:01.543Z | partial_mapping_excluded_source_sample_data | no _source +2024-10-23T12:15:03.360Z | partial_mapping_sample_data | Connected to 10.1.0.3! +2024-10-23T12:27:28.948Z | partial_mapping_sample_data | Connected to 10.1.0.2! +2024-10-23T13:33:34.937Z | partial_mapping_sample_data | 42 +2024-10-23T13:51:54.732Z | partial_mapping_sample_data | Connection error? +2024-10-23T13:52:55.015Z | partial_mapping_sample_data | Connection error? +2024-10-23T13:53:55.832Z | partial_mapping_sample_data | Connection error? +2024-10-23T13:55:01.543Z | partial_mapping_sample_data | Connected to 10.1.0.1! +; + +partialMappingUnionTypes +required_capability: index_metadata_field +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data,partial_mapping_excluded_source_sample_data METADATA _index +| EVAL actual_value = message::STRING +| KEEP @timestamp, _index, actual_value +| SORT actual_value, @timestamp ASC +; + +@timestamp:date | _index:keyword | actual_value:string +2024-10-23T13:33:34.937Z | partial_mapping_sample_data | 42 +2024-10-23T13:55:01.543Z | partial_mapping_sample_data | Connected to 10.1.0.1! +2024-10-23T12:27:28.948Z | partial_mapping_sample_data | Connected to 10.1.0.2! +2024-10-23T12:15:03.360Z | partial_mapping_sample_data | Connected to 10.1.0.3! +2024-10-23T13:51:54.732Z | partial_mapping_sample_data | Connection error? +2024-10-23T13:52:55.015Z | partial_mapping_sample_data | Connection error? +2024-10-23T13:53:55.832Z | partial_mapping_sample_data | Connection error? +2024-10-23T12:15:03.360Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T12:27:28.948Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T13:33:34.937Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T13:51:54.732Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T13:52:55.015Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T13:53:55.832Z | partial_mapping_excluded_source_sample_data | null +2024-10-23T13:55:01.543Z | partial_mapping_excluded_source_sample_data | null +; + +partialMappingStatsAfterCast +required_capability: index_metadata_field +required_capability: source_field_mapping +required_capability: unmapped_fields +required_capability: optional_fields + +SET unmapped_fields="load"\; +FROM partial_mapping_sample_data,partial_mapping_excluded_source_sample_data +| STATS count(*) BY message::INT +; +warningRegex: Line 3:21: evaluation of \[message::INT\] failed, treating result as null. Only first 20 failures recorded. +warningRegex: org.elasticsearch.xpack.esql.core.InvalidArgumentException: Cannot parse number \[.*\] + +count(*):long | message::INT:integer +13 | null +1 | 42 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec new file mode 100644 index 0000000000000..8745ae7987d44 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec @@ -0,0 +1,468 @@ +simpleKeep +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| KEEP foo +| LIMIT 3 +; + +foo:null +null +null +null +; + +keepStar +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| KEEP *, foo +| SORT emp_no +| LIMIT 1 +; + +avg_worked_seconds:long|birth_date:date|emp_no:integer|first_name:keyword|gender:keyword|height:double|height.float:double|height.half_float:double|height.scaled_float:double|hire_date:date|is_rehired:boolean|job_positions:keyword|languages:integer|languages.byte:integer|languages.long:long|languages.short:integer|last_name:keyword|salary:integer|salary_change:double|salary_change.int:integer|salary_change.keyword:keyword|salary_change.long:long|still_hired:boolean|foo:null +268728049 |1953-09-02T00:00:00.000Z|10001 |Georgi |M |2.03 |2.0299999713897705|2.029296875 |2.03 |1986-06-26T00:00:00.000Z|[false, true] |[Accountant, Senior Python Developer]|2 |2 |2 |2 |Facello |57305 |1.19 |1 |1.19 |1 |true |null +; + + +keepStarSimpleKeep +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| KEEP * +| KEEP foo +| LIMIT 3 +; + +foo:null +null +null +null +; + +keepStarEval +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| KEEP * +| EVAL foo = does_not_exist_field + 1 +| SORT emp_no +| LIMIT 1 +; + +avg_worked_seconds:long|birth_date:date|emp_no:integer|first_name:keyword|gender:keyword|height:double|height.float:double|height.half_float:double|height.scaled_float:double|hire_date:date|is_rehired:boolean|job_positions:keyword|languages:integer|languages.byte:integer|languages.long:long|languages.short:integer|last_name:keyword|salary:integer|salary_change:double|salary_change.int:integer|salary_change.keyword:keyword|salary_change.long:long|still_hired:boolean|does_not_exist_field:null|foo:integer +268728049 |1953-09-02T00:00:00.000Z|10001 |Georgi |M |2.03 |2.0299999713897705|2.029296875 |2.03 |1986-06-26T00:00:00.000Z|[false, true] |[Accountant, Senior Python Developer]|2 |2 |2 |2 |Facello |57305 |1.19 |1 |1.19 |1 |true |null |null +; + + +dropPatternSimpleKeep +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| DROP a*, b*, e*, fi*, g*, h*, i*, j*, l*, s* +| KEEP foo +| LIMIT 3 +; + +foo:null +null +null +null +; + +dropPatternEval +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| DROP a*, b*, e*, fi*, g*, h*, i*, j*, l*, s* +| EVAL foo = does_not_exist_field + 1 +| LIMIT 3 +; + +does_not_exist_field:null | foo:integer +null | null +null | null +null | null +; + +keepWithPattern +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| KEEP emp_*, foo +| SORT emp_no +| LIMIT 1 +; + +emp_no:integer|foo:null +10001 |null +; + +rowKeep +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| EVAL y = does_not_exist_field1::INTEGER + x +| KEEP *, does_not_exist_field2 +; + +x:integer |does_not_exist_field1:null|y:integer |does_not_exist_field2:null +1 |null |null |null +; + +rowDrop +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| DROP does_not_exist +; + +x:integer +1 +; + +rowRename +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| RENAME x AS y, foo AS bar +; + +y:integer | bar:null +1 |null +; + +fieldRename +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| RENAME foo AS bar +| KEEP emp_no, bar, bazz +| SORT emp_no +| LIMIT 3 +; + +emp_no:integer | bar:null | bazz:null +10001 | null | null +10002 | null | null +10003 | null | null +; + +rowRenameEval +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| RENAME x AS whatever +| EVAL x = does_not_exist_field + 1 +; + +whatever:integer | does_not_exist_field:null | x:integer +1 | null | null +; + +casting +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| EVAL foo::LONG +; + +x:integer |foo:null |foo::LONG:long +1 |null |null +; + +shadowing +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| KEEP foo +| EVAL foo = 2 +; + +foo:integer +2 +; + +# https://github.com/elastic/elasticsearch/pull/139797 +statsAggs-Ignore +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = SUM(foo) +; + +s:long +null +; + +statsGroups +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS BY foo +; + +foo:null +null +; + +# https://github.com/elastic/elasticsearch/pull/139797 +statsAggs-Ignore +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = SUM(foo) BY bar +; + +s:long | bar:null +null | null +; + +statsExpressions +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = SUM(x) + bar BY bar +; + +s:long | bar:null +null | null +; + +statsExpressionsWithAliases +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = SUM(x) + b + c BY b = bar + baz, c = x +; + +s:long | b:null | c:integer +null | null | 1 +; + +statsFilteredAggs +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = COUNT(x) WHERE foo::LONG > 10 +; + +s:long +0 +; + +statsFilteredAggsAndGroups +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| STATS s = COUNT(x) WHERE foo::LONG > 10 BY bar +; + +s:long | bar:null +0 | null +; + +inlinestatsSum +required_capability: optional_fields +SET unmapped_fields="nullify"\; +ROW x = 1 +| INLINE STATS s = SUM(x) + b + c BY b = bar + baz, c = x - 1 +; + +x:integer | bar:null | baz:null | s:long | b:null | c:integer +1 | null | null | null | null | 0 +; + +filtering +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = 1 +| WHERE foo IS NULL +; + +x:integer | foo:null +1 | null +; + +filteringExpression +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +FROM employees +| WHERE emp_no_foo::LONG > 0 OR emp_no < 10002 +| KEEP emp_n* +; + +emp_no:integer | emp_no_foo:null +10001 | null +; + +sort +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = [1, 2] +| MV_EXPAND x +| SORT foo +; + +x:integer | foo:null +2 | null +1 | null +; + +sortExpression +required_capability: optional_fields + +SET unmapped_fields="nullify"\; +ROW x = [1, 2] +| MV_EXPAND x +| SORT foo::LONG + 2, x +; + +x:integer | foo:null +1 | null +2 | null +; + +mvExpand +required_capability: optional_fields +SET unmapped_fields="nullify"\; +ROW x = 1 +| MV_EXPAND foo +; + +x:integer | foo:null +1 | null +; + +# TODO. FROMx: this fails in parsing with just FROM even if -Ignore'd(!), in bwc tests only(!) +subqueryNoMainIndex-Ignore +required_capability: optional_fields +required_capability: subquery_in_from_command + +SET unmapped_fields="nullify"\; +FROMx + (FROM employees + | EVAL emp_no_plus = emp_no_foo::LONG + 1 + | WHERE emp_no < 10003) +| KEEP emp_no* +| SORT emp_no, emp_no_plus +; + +emp_no:integer | emp_no_foo:null | emp_no_plus:long +10001 | null | null +10002 | null | null +; + +subqueryInFromWithStatsInMainQuery-Ignore +required_capability: optional_fields +required_capability: subquery_in_from_command + +SET unmapped_fields="nullify"\; +FROM sample_data, sample_data_str, + (FROM sample_data_ts_nanos + | WHERE client_ip == "172.21.3.15" OR foo::IP == "1.1.1.1"), + (FROM sample_data_ts_long + | EVAL @timestamp = @timestamp::date_nanos, bar = baz::KEYWORD + | WHERE client_ip == "172.21.0.5") +| EVAL client_ip = client_ip::ip +| STATS BY client_ip, foo, bar, baz +| SORT client_ip +; + +client_ip:ip |foo:null |bar:keyword |baz:null +172.21.0.5 |null |null |null +172.21.2.113 |null |null |null +172.21.2.162 |null |null |null +172.21.3.15 |null |null |null +; + +forkBranchesWithDifferentSchemas +required_capability: optional_fields +required_capability: fork_v9 + +SET unmapped_fields="nullify"\; +FROM employees +| WHERE does_not_exist2 IS NULL +| FORK (WHERE emp_no > 10000 | SORT does_not_exist3, emp_no | LIMIT 3 ) + (WHERE emp_no < 10002 | EVAL xyz = COALESCE(does_not_exist4, "def", "abc")) + (DISSECT hire_date::KEYWORD "%{year}-%{month}-%{day}T" + | STATS x = MIN(year::LONG), y = MAX(month::LONG) WHERE year::LONG > 1000 + does_not_exist5::DOUBLE + | EVAL xyz = "abc") +| KEEP emp_no, x, y, xyz, _fork +| SORT _fork, emp_no +; + +emp_no:integer |x:long |y:long |xyz:keyword |_fork:keyword +10001 |null |null |null |fork1 +10002 |null |null |null |fork1 +10003 |null |null |null |fork1 +10001 |null |null |def |fork2 +null |1985 |null |abc |fork3 +; + +inlinestatsCount +required_capability: optional_fields +required_capability: inline_stats + +SET unmapped_fields="nullify"\; +ROW x = 1 +| INLINE STATS c = COUNT(*), s = SUM(does_not_exist) BY d = does_not_exist +; + +# `c` should be just 0 : https://github.com/elastic/elasticsearch/issues/139887 +x:integer |does_not_exist:null|c:long |s:double |d:null +1 |null |null |null |null +; + +lookupJoin +required_capability: optional_fields +required_capability: join_lookup_v12 + +SET unmapped_fields="nullify"\; +ROW x = 1 +| EVAL language_code = does_not_exist::INTEGER +| LOOKUP JOIN languages_lookup ON language_code +; + +x:integer |does_not_exist:null |language_code:integer |language_name:keyword +1 |null |null |null +; + +enrich +required_capability: optional_fields +required_capability: enrich_load + +SET unmapped_fields="nullify"\; +ROW x = 1 +| EVAL y = does_not_exist::KEYWORD +| ENRICH languages_policy ON y +; + +x:integer |does_not_exist:null |y:keyword | language_name:keyword +1 |null |null |null +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 8a1a7f1cad334..802063537ee2c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -153,11 +153,17 @@ public enum Cap { * Cast string literals to a desired data type for IN predicate and more types for BinaryComparison. */ STRING_LITERAL_AUTO_CASTING_EXTENDED, + /** * Support for metadata fields. */ METADATA_FIELDS, + /** + * Support for optional fields (might or might not be present in the mappings). + */ + OPTIONAL_FIELDS(Build.current().isSnapshot()), + /** * Support specifically for *just* the _index METADATA field. Used by CsvTests, since that is the only metadata field currently * supported. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 2b90e8597c9c2..1f82d988c51aa 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -22,6 +22,8 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.analysis.AnalyzerRules.ParameterizedAnalyzerRule; +import org.elasticsearch.xpack.esql.analysis.rules.ResolveUnmapped; +import org.elasticsearch.xpack.esql.analysis.rules.ResolvedProjects; import org.elasticsearch.xpack.esql.capabilities.ConfigurationAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; import org.elasticsearch.xpack.esql.common.Failure; @@ -41,6 +43,7 @@ import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; @@ -145,6 +148,7 @@ import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; +import org.elasticsearch.xpack.esql.plan.logical.local.ResolvingProject; import org.elasticsearch.xpack.esql.plan.logical.promql.PromqlCommand; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor; @@ -231,9 +235,17 @@ public class Analyzer extends ParameterizedRuleExecutor("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new AddImplicitForkLimit(), new UnionTypesCleanup()) + new Batch<>( + "Finish Analysis", + Limiter.ONCE, + new ResolvedProjects(), + new AddImplicitLimit(), + new AddImplicitForkLimit(), + new UnionTypesCleanup() + ) ); public static final TransportVersion ESQL_LOOKUP_JOIN_FULL_TEXT_FUNCTION = TransportVersion.fromName( "esql_lookup_join_full_text_function" @@ -505,6 +517,7 @@ protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { if (plan.childrenResolved() == false) { return plan; } + // TODO: assess if building this list is still required ahead of the switch, or if it can be done per command only where needed final List childrenOutput = new ArrayList<>(); // Gather all the children's output in case of non-unary plans; even for unaries, we need to copy because we may mutate this to @@ -514,13 +527,13 @@ protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { childrenOutput.addAll(output); } - return switch (plan) { + var resolved = switch (plan) { case Aggregate a -> resolveAggregate(a, childrenOutput); case Completion c -> resolveCompletion(c, childrenOutput); - case Drop d -> resolveDrop(d, childrenOutput); - case Rename r -> resolveRename(r, childrenOutput); - case Keep p -> resolveKeep(p, childrenOutput); - case Fork f -> resolveFork(f, context); + case Drop d -> resolveDrop(d, context.unmappedResolution()); + case Rename r -> resolveRename(r, context.unmappedResolution()); + case Keep k -> resolveKeep(k, context.unmappedResolution()); + case Fork f -> resolveFork(f); case Eval p -> resolveEval(p, childrenOutput); case Enrich p -> resolveEnrich(p, childrenOutput); case MvExpand p -> resolveMvExpand(p, childrenOutput); @@ -532,19 +545,33 @@ protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { case PromqlCommand promql -> resolvePromql(promql, childrenOutput); default -> plan.transformExpressionsOnly(UnresolvedAttribute.class, ua -> maybeResolveAttribute(ua, childrenOutput)); }; + + return context.unmappedResolution() == UnmappedResolution.LOAD ? resolvePartiallyMapped(resolved, context) : resolved; } private LogicalPlan resolveAggregate(Aggregate aggregate, List childrenOutput) { // if the grouping is resolved but the aggs are not, use the former to resolve the latter // e.g. STATS a ... GROUP BY a = x + 1 - Holder changed = new Holder<>(false); - List groupings = aggregate.groupings(); - List aggregates = aggregate.aggregates(); - Function resolve = ua -> maybeResolveAttribute(ua, childrenOutput); // first resolve groupings since the aggs might refer to them // trying to globally resolve unresolved attributes will lead to some being marked as unresolvable + List newGroupings = maybeResolveGroupings(aggregate, childrenOutput); + List newAggregates = maybeResolveAggregates(aggregate, newGroupings, childrenOutput); + boolean changed = newGroupings != aggregate.groupings() || newAggregates != aggregate.aggregates(); + LogicalPlan maybeNewAggregate = changed ? aggregate.with(aggregate.child(), newGroupings, newAggregates) : aggregate; + + return maybeNewAggregate instanceof TimeSeriesAggregate ts && ts.timestamp() instanceof UnresolvedAttribute unresolvedTimestamp + ? ts.withTimestamp(maybeResolveAttribute(unresolvedTimestamp, childrenOutput)) + : maybeNewAggregate; + + } + + private List maybeResolveGroupings(Aggregate aggregate, List childrenOutput) { + List groupings = aggregate.groupings(); + if (Resolvables.resolved(groupings) == false) { + Holder changed = new Holder<>(false); List newGroupings = new ArrayList<>(groupings.size()); + Function resolve = ua -> maybeResolveAttribute(ua, childrenOutput); for (Expression g : groupings) { Expression resolved = g.transformUp(UnresolvedAttribute.class, resolve); if (resolved != g) { @@ -552,32 +579,44 @@ private LogicalPlan resolveAggregate(Aggregate aggregate, List childr } newGroupings.add(resolved); } - groupings = newGroupings; + if (changed.get()) { - aggregate = aggregate.with(aggregate.child(), newGroupings, aggregate.aggregates()); - changed.set(false); + return newGroupings; } } - if (Resolvables.resolved(groupings) == false || Resolvables.resolved(aggregates) == false) { - ArrayList resolved = new ArrayList<>(); - for (Expression e : groupings) { - Attribute attr = Expressions.attribute(e); - if (attr != null && attr.resolved()) { - resolved.add(attr); - } + return groupings; + } + + private List maybeResolveAggregates( + Aggregate aggregate, + List newGroupings, + List childrenOutput + ) { + List groupings = aggregate.groupings(); + List aggregates = aggregate.aggregates(); + + ArrayList resolvedGroupings = new ArrayList<>(newGroupings.size()); + for (Expression e : newGroupings) { + Attribute attr = Expressions.attribute(e); + if (attr != null && attr.resolved()) { + resolvedGroupings.add(attr); } - List resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput); + } + + boolean allGroupingsResolved = groupings.size() == resolvedGroupings.size(); + if (allGroupingsResolved == false || Resolvables.resolved(aggregates) == false) { + Holder changed = new Holder<>(false); + List resolvedList = NamedExpressions.mergeOutputAttributes(resolvedGroupings, childrenOutput); - List newAggregates = new ArrayList<>(); - // If the groupings are not resolved, skip the resolution of the references to groupings in the aggregates, resolve the + List newAggregates = new ArrayList<>(aggregates.size()); + // If no groupings are resolved, skip the resolution of the references to groupings in the aggregates, resolve the // aggregations that do not reference to groupings, so that the fields/attributes referenced by the aggregations can be // resolved, and verifier doesn't report field/reference/column not found errors for them. - boolean groupingResolved = Resolvables.resolved(groupings); - int size = groupingResolved ? aggregates.size() : aggregates.size() - groupings.size(); + int aggsIndexLimit = resolvedGroupings.isEmpty() ? aggregates.size() - groupings.size() : aggregates.size(); for (int i = 0; i < aggregates.size(); i++) { NamedExpression maybeResolvedAgg = aggregates.get(i); - if (i < size) { // Skip resolving references to groupings in the aggregations if the groupings are not resolved yet. + if (i < aggsIndexLimit) { // Skip resolving references to groupings in the aggs if no groupings are resolved yet. maybeResolvedAgg = (NamedExpression) maybeResolvedAgg.transformUp(UnresolvedAttribute.class, ua -> { Expression ne = ua; Attribute maybeResolved = maybeResolveAttribute(ua, resolvedList); @@ -585,7 +624,7 @@ private LogicalPlan resolveAggregate(Aggregate aggregate, List childr // maybeResolved is not resolved, return the original UnresolvedAttribute, so that it has another chance // to get resolved in the next iteration. // For example STATS c = count(emp_no), x = d::int + 1 BY d = (date == "2025-01-01") - if (groupingResolved || maybeResolved.resolved()) { + if (allGroupingsResolved || maybeResolved.resolved()) { changed.set(true); ne = maybeResolved; } @@ -595,14 +634,12 @@ private LogicalPlan resolveAggregate(Aggregate aggregate, List childr newAggregates.add(maybeResolvedAgg); } - // TODO: remove this when Stats interface is removed - aggregate = changed.get() ? aggregate.with(aggregate.child(), groupings, newAggregates) : aggregate; - } - if (aggregate instanceof TimeSeriesAggregate ts && ts.timestamp() instanceof UnresolvedAttribute unresolvedTimestamp) { - return ts.withTimestamp(maybeResolveAttribute(unresolvedTimestamp, childrenOutput)); - } else { - return aggregate; + if (changed.get()) { + return newAggregates; + } } + + return aggregates; } private LogicalPlan resolveCompletion(Completion p, List childrenOutput) { @@ -869,13 +906,12 @@ private boolean isTranslatable(Expression expression) { return translatable(expression, LucenePushdownPredicates.DEFAULT) != TranslationAware.Translatable.NO; } - private LogicalPlan resolveFork(Fork fork, AnalyzerContext context) { + private LogicalPlan resolveFork(Fork fork) { // we align the outputs of the sub plans such that they have the same columns boolean changed = false; List newSubPlans = new ArrayList<>(); List outputUnion = Fork.outputUnion(fork.children()); List forkColumns = outputUnion.stream().map(Attribute::name).toList(); - Set unsupportedAttributeNames = Fork.outputUnsupportedAttributeNames(fork.children()); for (LogicalPlan logicalPlan : fork.children()) { Source source = logicalPlan.source(); @@ -907,7 +943,7 @@ private LogicalPlan resolveFork(Fork fork, AnalyzerContext context) { } List subPlanColumns = logicalPlan.output().stream().map(Attribute::name).toList(); - // We need to add an explicit Project to align the outputs. + // We need to add an explicit projection to align the outputs. if (logicalPlan instanceof Project == false || subPlanColumns.equals(forkColumns) == false) { changed = true; List newOutput = new ArrayList<>(); @@ -918,7 +954,7 @@ private LogicalPlan resolveFork(Fork fork, AnalyzerContext context) { } } } - logicalPlan = resolveKeep(new Keep(logicalPlan.source(), logicalPlan, newOutput), logicalPlan.output()); + logicalPlan = resolveKeep(new Keep(logicalPlan.source(), logicalPlan, newOutput), UnmappedResolution.FAIL); } newSubPlans.add(logicalPlan); @@ -1013,7 +1049,7 @@ private LogicalPlan resolveInsist(Insist insist, List childrenOutput, return insist.withAttributes(list); } - private List collectIndexResolutions(LogicalPlan plan, AnalyzerContext context) { + private static List collectIndexResolutions(LogicalPlan plan, AnalyzerContext context) { List resolutions = new ArrayList<>(); plan.forEachDown(EsRelation.class, e -> { var resolution = context.indexResolution().get(new IndexPattern(e.source(), e.indexPattern())); @@ -1042,7 +1078,7 @@ private Attribute resolveInsistAttribute(Attribute attribute, List ch return resolvedCol; } - private static Attribute invalidInsistAttribute(FieldAttribute fa) { + private static FieldAttribute invalidInsistAttribute(FieldAttribute fa) { var name = fa.name(); EsField field = fa.field() instanceof InvalidMappedField imf ? new InvalidMappedField(name, InvalidMappedField.makeErrorsMessageIncludingInsistKeyword(imf.getTypesToIndices())) @@ -1056,7 +1092,7 @@ private static Attribute invalidInsistAttribute(FieldAttribute fa) { return new FieldAttribute(fa.source(), null, fa.qualifier(), name, field); } - private static FieldAttribute insistKeyword(Attribute attribute) { + public static FieldAttribute insistKeyword(Attribute attribute) { return new FieldAttribute( attribute.source(), null, @@ -1066,6 +1102,54 @@ private static FieldAttribute insistKeyword(Attribute attribute) { ); } + /** + * This will inspect current node/{@code plan}'s expressions and check if any of the {@code FieldAttribute}s refer to fields that + * are partially unmapped across the indices involved in the plan fragment. If so, replace their field with an "insisted" EsField. + */ + private static LogicalPlan resolvePartiallyMapped(LogicalPlan plan, AnalyzerContext context) { + var indexResolutions = collectIndexResolutions(plan, context); + Map insistedMap = new HashMap<>(); + var transformed = plan.transformExpressionsOnly(FieldAttribute.class, fa -> { + var esField = fa.field(); + var isInsisted = esField instanceof PotentiallyUnmappedKeywordEsField || esField instanceof InvalidMappedField; + if (isInsisted == false) { + var existing = insistedMap.get(fa); + if (existing != null) { // field shows up multiple times in the node; return first processing + return existing; + } + // Field is partially unmapped. + if (indexResolutions.stream().anyMatch(r -> r.get().isPartiallyUnmappedField(fa.name()))) { + FieldAttribute newFA = fa.dataType() == KEYWORD ? insistKeyword(fa) : invalidInsistAttribute(fa); + insistedMap.put(fa, newFA); + return newFA; + } + } + return fa; + }); + return insistedMap.isEmpty() ? transformed : propagateInsistedFields(transformed, insistedMap); + } + + /** + * Push only those fields from the {@code insistedMap} into {@code EsRelation}s in the {@code plan} that wrap a + * {@code PotentiallyUnmappedKeywordEsField}. + */ + private static LogicalPlan propagateInsistedFields(LogicalPlan plan, Map insistedMap) { + return plan.transformUp(EsRelation.class, esr -> { + var newOutput = new ArrayList(); + boolean updated = false; + for (Attribute attr : esr.output()) { + var newFA = insistedMap.get(attr); + if (newFA != null && newFA.field() instanceof PotentiallyUnmappedKeywordEsField) { + newOutput.add(newFA); + updated = true; + } else { + newOutput.add(attr); + } + } + return updated ? esr.withAttributes(newOutput) : esr; + }); + } + private LogicalPlan resolveFuse(Fuse fuse, List childrenOutput) { Source source = fuse.source(); Attribute score = fuse.score(); @@ -1219,14 +1303,19 @@ private LogicalPlan resolveEval(Eval eval, List childOutput) { * row foo = 1, bar = 2 | keep foo, * -> foo, bar * row foo = 1, bar = 2 | keep bar*, foo, * -> bar, foo */ - private LogicalPlan resolveKeep(Project p, List childOutput) { - List resolvedProjections = new ArrayList<>(); - var projections = p.projections(); + private static LogicalPlan resolveKeep(Keep keep, UnmappedResolution unmappedResolution) { + return unmappedResolution == UnmappedResolution.FAIL + ? new Project(keep.source(), keep.child(), keepResolver(keep.projections(), keep.child().output())) + : new ResolvingProject(keep.source(), keep.child(), inputAttributes -> keepResolver(keep.projections(), inputAttributes)); + } + + private static List keepResolver(List projections, List childOutput) { + List resolvedProjections; // start with projections // no projection specified or just * - if (projections.isEmpty() || (projections.size() == 1 && projections.get(0) instanceof UnresolvedStar)) { - resolvedProjections.addAll(childOutput); + if (projections.isEmpty() || (projections.size() == 1 && projections.getFirst() instanceof UnresolvedStar)) { + resolvedProjections = new ArrayList<>(childOutput); } // otherwise resolve them else { @@ -1252,7 +1341,7 @@ private LogicalPlan resolveKeep(Project p, List childOutput) { } else { throw new EsqlIllegalArgumentException("unexpected projection: " + proj); } - for (Attribute attr : resolved) { + for (var attr : resolved) { Integer previousPrio = priorities.get(attr); if (previousPrio == null || previousPrio >= priority) { priorities.remove(attr); @@ -1263,13 +1352,19 @@ private LogicalPlan resolveKeep(Project p, List childOutput) { resolvedProjections = new ArrayList<>(priorities.keySet()); } - return new Project(p.source(), p.child(), resolvedProjections); + return resolvedProjections; + } + + private static LogicalPlan resolveDrop(Drop drop, UnmappedResolution unmappedResolution) { + return unmappedResolution == UnmappedResolution.FAIL + ? new Project(drop.source(), drop.child(), dropResolver(drop.removals(), drop.output())) + : new ResolvingProject(drop.source(), drop.child(), inputAttributes -> dropResolver(drop.removals(), inputAttributes)); } - private LogicalPlan resolveDrop(Drop drop, List childOutput) { + private static List dropResolver(List removals, List childOutput) { List resolvedProjections = new ArrayList<>(childOutput); - for (NamedExpression ne : drop.removals()) { + for (NamedExpression ne : removals) { List resolved; if (ne instanceof UnresolvedNamePattern np) { @@ -1293,21 +1388,25 @@ private LogicalPlan resolveDrop(Drop drop, List childOutput) { }); } - return new Project(drop.source(), drop.child(), resolvedProjections); + return resolvedProjections; } - private LogicalPlan resolveRename(Rename rename, List childrenOutput) { - List projections = projectionsForRename(rename, childrenOutput, log); - - return new Project(rename.source(), rename.child(), projections); + private LogicalPlan resolveRename(Rename rename, UnmappedResolution unmappedResolution) { + return unmappedResolution == UnmappedResolution.FAIL + ? new Project(rename.source(), rename.child(), projectionsForRename(rename, rename.child().output(), log)) + : new ResolvingProject( + rename.source(), + rename.child(), + inputAttributes -> projectionsForRename(rename, inputAttributes, log) + ); } /** - * This will turn a {@link Rename} into an equivalent {@link Project}. - * Can mutate {@code childrenOutput}; hand this a copy if you want to avoid mutation. + * This will compute the projections for a {@link Rename}. */ - public static List projectionsForRename(Rename rename, List childrenOutput, Logger logger) { - List projections = new ArrayList<>(childrenOutput); + public static List projectionsForRename(Rename rename, List inputAttributes, Logger logger) { + List childrenOutput = new ArrayList<>(inputAttributes); + List projections = new ArrayList<>(inputAttributes); int renamingsCount = rename.renamings().size(); List unresolved = new ArrayList<>(renamingsCount); @@ -1354,7 +1453,7 @@ public static List projectionsForRename(Rename rename, List resolveAgainstList(UnresolvedNamePattern up, Collection attrList) { - UnresolvedAttribute ua = new UnresolvedAttribute(up.source(), up.pattern()); + UnresolvedAttribute ua = new UnresolvedPattern(up.source(), up.pattern()); Predicate matcher = a -> up.match(a.name()); var matches = AnalyzerRules.maybeResolveAgainstList(matcher, () -> ua, attrList, true, a -> Analyzer.handleSpecialFields(ua, a)); return potentialCandidatesIfNoMatchesFound(ua, matches, attrList, list -> UnresolvedNamePattern.errorMessage(up.pattern(), list)); @@ -2468,10 +2567,12 @@ private LogicalPlan doRule(Aggregate plan) { /** * Handle union types in UnionAll: - * 1. Push down explicit conversion functions into the UnionAll branches - * 2. Replace the explicit conversion functions with the corresponding attributes in the UnionAll output - * 3. Implicitly cast the outputs of the UnionAll branches to the common type, this applies to date and date_nanos types only - * 4. Update the attributes referencing the updated UnionAll output + *
    + *
  1. Push down explicit conversion functions into the UnionAll branches
  2. + *
  3. Replace the explicit conversion functions with the corresponding attributes in the UnionAll output
  4. + *
  5. Implicitly cast the outputs of the UnionAll branches to the common type, this applies to date and date_nanos types only
  6. + *
  7. Update the attributes referencing the updated UnionAll output
  8. + *
*/ private static class ResolveUnionTypesInUnionAll extends Rule { @@ -2590,7 +2691,7 @@ private static Map> collectConvertFunctions * Push down the conversion functions into the child plan by adding an Eval with the new aliases on top of the child plan. */ private static LogicalPlan maybePushDownConvertFunctionsToChild(LogicalPlan child, List aliases, List output) { - // Fork/UnionAll adds an Project on top of each child plan during resolveFork, check this pattern before pushing down + // Fork/UnionAll adds a projection on top of each child plan during resolveFork, check this pattern before pushing down // If the pattern doesn't match, something unexpected happened, just return the child as is if (aliases.isEmpty() == false && child instanceof Project project) { LogicalPlan childOfProject = project.child(); @@ -2735,7 +2836,7 @@ private static LogicalPlan implicitCastingUnionAllOutput( outputChanged = true; } } - // create a new eval for the casting expressions, and push it down under the Project + // create a new eval for the casting expressions, and push it down under the projection newChildren.add(maybePushDownConvertFunctionsToChild(child, newAliases, newChildOutput)); } @@ -2926,17 +3027,22 @@ private static LogicalPlan updateAttributesReferencingUpdatedUnionAllOutput( /** * Prune branches of a UnionAll that resolve to empty subqueries. * For example, given the following plan, the index resolution of 'remote:missingIndex' is EMPTY_SUBQUERY: + *
      * UnionAll[[]]
      * |_EsRelation[test][...]
      * |_Subquery[]
      * | \_UnresolvedRelation[remote:missingIndex]
      * \_Subquery[]
      *   \_EsRelation[sample_data][...]
+     * 
+ * * The branch with EMPTY_SUBQUERY index resolution is pruned in the plan after the rule is applied: + *
      * UnionAll[[]]
      * |_EsRelation[test][...]
      * \_Subquery[]
      *   \_EsRelation[sample_data][...]
+     * 
*/ private static class PruneEmptyUnionAllBranch extends ParameterizedAnalyzerRule { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerContext.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerContext.java index cb0aaa96bb56d..5d32b6028c8ec 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerContext.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerContext.java @@ -26,6 +26,7 @@ public class AnalyzerContext { private final InferenceResolution inferenceResolution; private final TransportVersion minimumVersion; private Boolean hasRemoteIndices; + private final UnmappedResolution unmappedResolution; public AnalyzerContext( Configuration configuration, @@ -34,7 +35,8 @@ public AnalyzerContext( Map lookupResolution, EnrichResolution enrichResolution, InferenceResolution inferenceResolution, - TransportVersion minimumVersion + TransportVersion minimumVersion, + UnmappedResolution unmappedResolution ) { this.configuration = configuration; this.functionRegistry = functionRegistry; @@ -43,6 +45,7 @@ public AnalyzerContext( this.enrichResolution = enrichResolution; this.inferenceResolution = inferenceResolution; this.minimumVersion = minimumVersion; + this.unmappedResolution = unmappedResolution; assert minimumVersion != null : "AnalyzerContext must have a minimum transport version"; assert TransportVersion.current().supports(minimumVersion) @@ -85,7 +88,16 @@ public boolean includesRemoteIndices() { return hasRemoteIndices; } - public AnalyzerContext(Configuration configuration, EsqlFunctionRegistry functionRegistry, EsqlSession.PreAnalysisResult result) { + public UnmappedResolution unmappedResolution() { + return unmappedResolution; + } + + public AnalyzerContext( + Configuration configuration, + EsqlFunctionRegistry functionRegistry, + UnmappedResolution unmappedResolution, + EsqlSession.PreAnalysisResult result + ) { this( configuration, functionRegistry, @@ -93,7 +105,8 @@ public AnalyzerContext(Configuration configuration, EsqlFunctionRegistry functio result.lookupIndices(), result.enrichResolution(), result.inferenceResolution(), - result.minimumTransportVersion() + result.minimumTransportVersion(), + unmappedResolution ); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/UnmappedResolution.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/UnmappedResolution.java new file mode 100644 index 0000000000000..f0e8907f4a980 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/UnmappedResolution.java @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis; + +import org.elasticsearch.xpack.esql.core.type.DataType; + +/** + * This is an unmapped-fields strategy discriminator. + */ +public enum UnmappedResolution { + /** + * Don't attempt to patch the plan: in case the query uses such a field not present in the index mapping, fail the query. + */ + FAIL, + + /** + * In case the query references a field that's not present in the index mapping, alias this field to value {@code null} of type + * {@link DataType#NULL} + */ + NULLIFY, + + /** + * In case the query references a field that's not present in the index mapping, attempt to load it from {@code _source}. + */ + LOAD +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java new file mode 100644 index 0000000000000..6052c72b22a26 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java @@ -0,0 +1,317 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis.rules; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.AnalyzerRules; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; +import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Fork; +import org.elasticsearch.xpack.esql.plan.logical.LeafPlan; +import org.elasticsearch.xpack.esql.plan.logical.Limit; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.Row; +import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; +import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.xpack.esql.analysis.Analyzer.ResolveRefs.insistKeyword; +import static org.elasticsearch.xpack.esql.core.util.CollectionUtils.combine; + +/** + * The rule handles fields that don't show up in the index mapping, but are used within the query. These fields can either be missing + * entirely, or be present in the document, but not in the mapping (which can happen with non-dynamic mappings). The handling strategy is + * driven by the {@link AnalyzerContext#unmappedResolution()} setting. + *

+ * In the case of the former ones, the rule introduces {@code EVAL missing = NULL} commands (null-aliasing / null-Eval'ing). + *

+ * In the case of the latter ones, it introduces field extractors in the source (where this supports it). + *

+ * In both cases, the rule takes care of propagation of the aliases, where needed (i.e., through "artificial" projections introduced within + * the analyzer itself; vs. the KEEP/RENAME/DROP-introduced projections). Note that this doesn't "boost" the visibility of such an + * attribute: if, for instance, referencing a mapping-missing attribute occurs after a STATS that doesn't group by it, that attribute will + * remain unresolved and fail the verification. The language remains semantically consistent. + */ +public class ResolveUnmapped extends AnalyzerRules.ParameterizedAnalyzerRule { + + private static final Literal NULLIFIED = Literal.NULL; + + @Override + protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { + return switch (context.unmappedResolution()) { + case UnmappedResolution.FAIL -> plan; + case UnmappedResolution.NULLIFY -> resolve(plan, false); + case UnmappedResolution.LOAD -> resolve(plan, true); + }; + } + + private static LogicalPlan resolve(LogicalPlan plan, boolean load) { + if (plan.childrenResolved() == false) { + return plan; + } + + var unresolved = collectUnresolved(plan); + if (unresolved.isEmpty()) { + return plan; + } + + var transformed = load ? load(plan, unresolved) : nullify(plan, unresolved); + + return transformed.equals(plan) ? plan : refreshPlan(transformed, unresolved); + } + + /** + * The method introduces {@code EVAL missing_field = NULL}-equivalent into the plan, on top of the source, for every attribute in + * {@code unresolved}. It also "patches" the introduced attributes through the plan, where needed (like through Fork/UntionAll). + */ + private static LogicalPlan nullify(LogicalPlan plan, List unresolved) { + var nullAliases = nullAliases(unresolved); + + // insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it + var transformed = plan.transformUp( + n -> n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan, + p -> evalUnresolvedUnary((UnaryPlan) p, nullAliases) + ); + // insert an Eval on top of those LeafPlan that are children of n-ary plans (could happen with UnionAll) + transformed = transformed.transformUp( + n -> n instanceof UnaryPlan == false && n instanceof LeafPlan == false, + nAry -> evalUnresolvedNary(nAry, nullAliases) + ); + + return transformed.transformUp(Fork.class, f -> patchFork(f, Expressions.asAttributes(nullAliases))); + } + + /** + * This method introduces field extractors - via "insisted", {@link PotentiallyUnmappedKeywordEsField} wrapped in + * {@link FieldAttribute} - for every attribute in {@code unresolved}, within the {@link EsRelation}s in the plan accessible from + * the given {@code plan}. + *

+ * It also "patches" the introduced attributes through the plan, where needed (like through Fork/UntionAll). + */ + private static LogicalPlan load(LogicalPlan plan, List unresolved) { + // TODO: this will need to be revisited for non-lookup joining or scenarios where we won't want extraction from specific sources + var transformed = plan.transformUp(EsRelation.class, esr -> { + if (esr.indexMode() == IndexMode.LOOKUP) { + return esr; + } + List fieldsToLoad = fieldsToLoad(unresolved, esr.outputSet().names()); + // there shouldn't be any duplicates, we can just merge the two lists + return fieldsToLoad.isEmpty() ? esr : esr.withAttributes(combine(esr.output(), fieldsToLoad)); + }); + + return transformed.transformUp(Fork.class, f -> patchFork(f, Expressions.asAttributes(fieldsToLoad(unresolved, Set.of())))); + } + + private static List fieldsToLoad(List unresolved, Set exclude) { + List insisted = new ArrayList<>(unresolved.size()); + Set names = new LinkedHashSet<>(unresolved.size()); + for (var ua : unresolved) { + // some plans may reference the same UA multiple times (Aggregate groupings in aggregates, Eval) + if (names.contains(ua.name()) == false && exclude.contains(ua.name()) == false) { + insisted.add(insistKeyword(ua)); + names.add(ua.name()); + } + } + return insisted; + } + + // TODO: would an alternative to this be to drop the current Fork and have ResolveRefs#resolveFork re-resolve it. We might need + // some plan delimiters/markers to make it unequivocal which nodes belong to "make Fork work" - like (Limit-Project[-Eval])s - and + // which don't. + private static Fork patchFork(Fork fork, List aliasAttributes) { + // if no child outputs the attribute, don't patch it through at all. + aliasAttributes.removeIf(a -> fork.children().stream().anyMatch(f -> descendantOutputsAttribute(f, a)) == false); + if (aliasAttributes.isEmpty()) { + return fork; + } + + List newChildren = new ArrayList<>(fork.children().size()); + for (var child : fork.children()) { + Holder patched = new Holder<>(false); + child = child.transformDown( + // TODO add a suitable forEachDownMayReturnEarly equivalent + n -> patched.get() == false && n instanceof Project, // process top Project only (Fork-injected) + n -> { + patched.set(true); + return patchForkProject((Project) n, aliasAttributes); + } + ); + if (patched.get() == false) { // assert + throw new EsqlIllegalArgumentException("Fork child misses a top projection"); + } + newChildren.add(child); + } + + return fork.replaceSubPlansAndOutput(newChildren, combine(fork.output(), aliasAttributes)); + } + + private static Project patchForkProject(Project project, List aliasAttributes) { + // refresh the IDs for each UnionAll child (needed for correct resolution of convert functions; see collectConvertFunctions()) + aliasAttributes = aliasAttributes.stream().map(a -> a.withId(new NameId())).toList(); + + project = project.withProjections(combine(project.projections(), aliasAttributes)); + + // If Project's child doesn't output the attribute, introduce a null-Eval'ing. This is similar to what Fork-resolution does. + List nullAliases = new ArrayList<>(aliasAttributes.size()); + for (var attribute : aliasAttributes) { + if (descendantOutputsAttribute(project, attribute) == false) { + nullAliases.add(nullAlias(attribute)); + } + } + return nullAliases.isEmpty() ? project : project.replaceChild(new Eval(project.source(), project.child(), nullAliases)); + } + + /** + * Fork injects a {@code Limit - Project (- Eval)} top structure into its subtrees. Skip the top Limit (if present) and Project in + * the {@code plan} and look at the output of the remaining fragment. + * @return {@code true} if this fragment's output contains the {@code attribute}. + */ + private static boolean descendantOutputsAttribute(LogicalPlan plan, Attribute attribute) { + plan = plan instanceof Limit limit ? limit.child() : plan; + if (plan instanceof Project project) { + return project.child().outputSet().names().contains(attribute.name()); + } + throw new EsqlIllegalArgumentException("unexpected node type [{}]", plan); // assert + } + + private static LogicalPlan refreshPlan(LogicalPlan plan, List unresolved) { + var refreshed = refreshUnresolved(plan, unresolved); + return refreshChildren(refreshed); + } + + /** + * The UAs that haven't been resolved are marked as unresolvable with a custom message. This needs to be removed for + * {@link Analyzer.ResolveRefs} to attempt again to wire them to the newly added aliases. That's what this method does. + */ + private static LogicalPlan refreshUnresolved(LogicalPlan plan, List unresolved) { + return plan.transformExpressionsOnlyUp(UnresolvedAttribute.class, ua -> { + if (unresolved.contains(ua)) { + unresolved.remove(ua); + // Besides clearing the message, we need to refresh the nameId to avoid equality with the previous plan. + // (A `new UnresolvedAttribute(ua.source(), ua.name())` would save an allocation, but is problematic with subtypes.) + ua = (ua.withId(new NameId())).withUnresolvedMessage(null); + } + return ua; + }); + } + + /** + * @return A plan having all nodes recreated (no properties changed, otherwise). This is needed to clear internal, lazy-eval'd and + * cached state, such as the output. The rule inserts new attributes in the plan, so the output of all the nodes downstream these + * insertions need be recomputed. + */ + private static LogicalPlan refreshChildren(LogicalPlan plan) { + var planChildren = plan.children(); + if (planChildren.isEmpty()) { + return plan; + } + List newChildren = new ArrayList<>(planChildren.size()); + planChildren.forEach(child -> newChildren.add(refreshChildren(child))); + return plan.replaceChildren(newChildren); + } + + /** + * Inserts an Eval atop each child of the given {@code nAry}, if the child is a LeafPlan. + */ + private static LogicalPlan evalUnresolvedNary(LogicalPlan nAry, List nullAliases) { + List newChildren = new ArrayList<>(nAry.children().size()); + boolean changed = false; + for (var child : nAry.children()) { + if (child instanceof LeafPlan source) { + assertSourceType(source); + child = new Eval(source.source(), source, nullAliases); + changed = true; + } + newChildren.add(child); + } + return changed ? nAry.replaceChildren(newChildren) : nAry; + } + + /** + * Inserts an Eval atop the given {@code unaryAtopSource}, if this isn't an Eval already. Otherwise it merges the nullAliases into it. + */ + private static LogicalPlan evalUnresolvedUnary(UnaryPlan unaryAtopSource, List nullAliases) { + assertSourceType(unaryAtopSource.child()); + if (unaryAtopSource instanceof Eval eval && eval.resolved()) { // if this Eval isn't resolved, insert a new (resolved) one + List pre = new ArrayList<>(nullAliases.size()); + List post = new ArrayList<>(nullAliases.size()); + var outputNames = eval.outputSet().names(); + var evalRefNames = eval.references().names(); + for (Alias a : nullAliases) { + if (outputNames.contains(a.name()) == false) { + var target = evalRefNames.contains(a.name()) ? pre : post; + target.add(a); + } + } + if (pre.size() + post.size() == 0) { + return eval; + } + return new Eval(eval.source(), eval.child(), combine(pre, eval.fields(), post)); + } else { + return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), nullAliases)); + } + } + + private static void assertSourceType(LogicalPlan source) { + switch (source) { + case EsRelation unused -> { + } + case Row unused -> { + } + case LocalRelation unused -> { + } + default -> throw new EsqlIllegalArgumentException("unexpected source type [{}]", source); + } + } + + private static List nullAliases(List unresolved) { + Map aliasesMap = new LinkedHashMap<>(unresolved.size()); + unresolved.forEach(u -> aliasesMap.computeIfAbsent(u.name(), k -> nullAlias(u))); + return new ArrayList<>(aliasesMap.values()); + } + + private static Alias nullAlias(Attribute attribute) { + return new Alias(attribute.source(), attribute.name(), NULLIFIED); + } + + /** + * @return all the {@link UnresolvedAttribute}s in the given node / {@code plan}, but excluding the {@link UnresolvedPattern} and + * {@link UnresolvedTimestamp} subtypes. + */ + private static List collectUnresolved(LogicalPlan plan) { + List unresolved = new ArrayList<>(); + plan.forEachExpression(UnresolvedAttribute.class, ua -> { + if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false) { + unresolved.add(ua); + } + }); + return unresolved; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolvedProjects.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolvedProjects.java new file mode 100644 index 0000000000000..25d1029481111 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolvedProjects.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis.rules; + +import org.elasticsearch.xpack.esql.analysis.AnalyzerRules; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.local.ResolvingProject; + +/** + * Converts any Analyzer-specific {@link ResolvingProject} into an {@link org.elasticsearch.xpack.esql.plan.logical.Project} equivalent. + */ +public class ResolvedProjects extends AnalyzerRules.AnalyzerRule { + + @Override + protected LogicalPlan rule(ResolvingProject plan) { + return plan.asProject(); + } + + @Override + protected boolean skipResolved() { + return false; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedPattern.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedPattern.java new file mode 100644 index 0000000000000..dfa36e0914a72 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedPattern.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.core.expression; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.UnresolvedNamePattern; + +/** + * When a {@code KEEP} or a {@code DROP} receives a wildcard pattern, this is provided to them as an {@link UnresolvedNamePattern}. This + * is run against the available attributes names. In case nothing matches, the resulting attribute would be an {@link UnresolvedAttribute}, + * which ends up being reported as to the user in a failure message. However, in case {@link UnmappedResolution unmapped fields} are + * enabled, an {@link UnresolvedAttribute} isn't sufficient, as we the analyzer wouldn't know. + */ +public class UnresolvedPattern extends UnresolvedAttribute { + public UnresolvedPattern(Source source, String name) { + super(source, name); + } + + public UnresolvedPattern( + Source source, + @Nullable String qualifier, + String name, + @Nullable NameId id, + @Nullable String unresolvedMessage + ) { + super(source, qualifier, name, id, unresolvedMessage); + } + + @Override + public UnresolvedPattern withUnresolvedMessage(String unresolvedMessage) { + return new UnresolvedPattern(source(), qualifier(), name(), id(), unresolvedMessage); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, UnresolvedPattern::new, qualifier(), name(), id(), unresolvedMessage()); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java index 6163a50a42ea4..23f2110d628cc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropgateUnmappedFields.java @@ -38,7 +38,20 @@ public LogicalPlan apply(LogicalPlan logicalPlan) { ? logicalPlan : logicalPlan.transformUp( EsRelation.class, - er -> er.withAttributes(NamedExpressions.mergeOutputAttributes(new ArrayList<>(unmappedFields), er.output())) + er -> hasPotentiallyUnmappedKeywordEsField(er) + ? er + : er.withAttributes(NamedExpressions.mergeOutputAttributes(new ArrayList<>(unmappedFields), er.output())) ); } + + // Checks if the EsRelation already has a PotentiallyUnmappedKeywordEsField. If true SET load_unmapped="load" is applied. + // This is used to practically disable the rule, since it changes the output order (mergeOutputAttributes()). + private static boolean hasPotentiallyUnmappedKeywordEsField(EsRelation er) { + for (var attr : er.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField) { + return true; + } + } + return false; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java index bd6f5b83f7340..99217e02c2eb1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java @@ -130,10 +130,21 @@ public EsqlStatement createStatement(String query) { } // testing utility - public EsqlStatement createStatement(String query, QueryParams params) { + public EsqlStatement unvalidatedStatement(String query, QueryParams params) { return createStatement(query, params, new PlanTelemetry(new EsqlFunctionRegistry()), new InferenceSettings(Settings.EMPTY)); } + // testing utility + public EsqlStatement createStatement(String query, QueryParams params) { + return parse( + query, + params, + new SettingsValidationContext(false, config.isDevVersion()), // TODO: wire CPS in + new PlanTelemetry(new EsqlFunctionRegistry()), + new InferenceSettings(Settings.EMPTY) + ); + } + public EsqlStatement parse( String query, QueryParams params, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QuerySettings.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QuerySettings.java index e19d3e463e420..d308180170e4a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QuerySettings.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QuerySettings.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan; import org.elasticsearch.core.Nullable; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -20,6 +21,8 @@ import java.time.ZoneId; import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Locale; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; @@ -66,6 +69,33 @@ public class QuerySettings { ZoneOffset.UTC ); + @Param( + name = "unmapped_fields", + type = { "keyword" }, + description = "Defines how unmapped fields are treated. Possible values are: " + + "\"FAIL\" (default) - fails the query if unmapped fields are present; " + + "\"NULLIFY\" - treats unmapped fields as null values; " + + "\"LOAD\" - attempts to load the fields from the source." + ) + public static final QuerySettingDef UNMAPPED_FIELDS = new QuerySettingDef<>( + "unmapped_fields", + DataType.KEYWORD, + false, + true, + true, + (value) -> { + String resolution = Foldables.stringLiteralValueOf(value, "Unexpected value"); + try { + return UnmappedResolution.valueOf(resolution.toUpperCase(Locale.ROOT)); + } catch (Exception exc) { + throw new IllegalArgumentException( + "Invalid unmapped_fields resolution [" + value + "], must be one of " + Arrays.toString(UnmappedResolution.values()) + ); + } + }, + UnmappedResolution.FAIL + ); + @Param(name = "approximate", type = { "boolean", "map_param" }, description = "TODO - add description here") @MapParam( name = "approximate", @@ -129,8 +159,12 @@ public class QuerySettings { null ); - public static final Map> SETTINGS_BY_NAME = Stream.of(PROJECT_ROUTING, TIME_ZONE, APPROXIMATE) - .collect(Collectors.toMap(QuerySettingDef::name, Function.identity())); + public static final Map> SETTINGS_BY_NAME = Stream.of( + UNMAPPED_FIELDS, + PROJECT_ROUTING, + TIME_ZONE, + APPROXIMATE + ).collect(Collectors.toMap(QuerySettingDef::name, Function.identity())); public static void validate(EsqlStatement statement, SettingsValidationContext ctx) { for (QuerySetting setting : statement.settings()) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Keep.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Keep.java index f680ef9fbc64e..d37eedae0c89f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Keep.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Keep.java @@ -33,7 +33,7 @@ public Project replaceChild(LogicalPlan newChild) { @Override public boolean expressionsResolved() { - return super.expressionsResolved(); + return super.expressionsResolved(); // TODO: is this method needed? } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/ResolvingProject.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/ResolvingProject.java new file mode 100644 index 0000000000000..2773f2e705ba0 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/local/ResolvingProject.java @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.logical.local; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.Project; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * This version of {@link Project} saves part of its state for computing its projections based on its child's output. This avoids + * the problem that once the projections are computed, we don't know which pattern was used to generate them. This is important + * when dealing with unmapped fields: E.g. in + * {@code SET unmapped_fields="nullify"; FROM idx | KEEP foo* | WHERE foo_bar > 10}, if {@code foo_bar} is not mapped, we need to inject + * a {@code NULL} literal for it before the {@code KEEP}. It's correct to update the projection of the {@code KEEP} to include this new + * attribute because the pattern {@code foo*} matches it. But if the pattern was {@code foo_baz}, it would be incorrect to do so. + */ +public class ResolvingProject extends Project { + private final Function, List> resolver; + + public ResolvingProject(Source source, LogicalPlan child, Function, List> resolver) { + this(source, child, resolver.apply(child.output()), resolver); + } + + private ResolvingProject( + Source source, + LogicalPlan child, + List projections, + Function, List> resolver + ) { + super(source, child, projections); + this.resolver = resolver; + } + + @Override + public String getWriteableName() { + throw new UnsupportedOperationException("doesn't escape the node"); + } + + public Function, List> resolver() { + return resolver; + } + + @Override + protected NodeInfo info() { + return NodeInfo.create( + this, + (source, child, projections) -> new ResolvingProject(source, child, projections, this.resolver), + child(), + projections() + ); + } + + @Override + public ResolvingProject replaceChild(LogicalPlan newChild) { + return new ResolvingProject(source(), newChild, resolver); + } + + @Override + public Project withProjections(List projections) { + return new ResolvingProject(source(), child(), projections, resolver); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), resolver); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + ResolvingProject other = (ResolvingProject) obj; + return super.equals(obj) && Objects.equals(resolver, other.resolver); + } + + public Project asProject() { + return new Project(source(), child(), projections()); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index ec02fc31bf363..76e411a954e4b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -99,6 +99,7 @@ import static java.util.stream.Collectors.toSet; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin.firstSubPlan; import static org.elasticsearch.xpack.esql.session.SessionUtils.checkPagesBelowSize; @@ -239,14 +240,14 @@ public void execute( ); FoldContext foldContext = configuration.newFoldContext(); - LogicalPlan plan = statement.plan(); - if (plan instanceof Explain explain) { + if (statement.plan() instanceof Explain explain) { explainMode = true; - plan = explain.query(); - parsedPlanString = plan.toString(); + statement = new EsqlStatement(explain.query(), statement.settings()); + parsedPlanString = explain.query().toString(); } + analyzedPlan( - plan, + statement, configuration, executionInfo, request.filter(), @@ -585,7 +586,7 @@ static void handleFieldCapsFailures( } public void analyzedPlan( - LogicalPlan parsed, + EsqlStatement parsed, Configuration configuration, EsqlExecutionInfo executionInfo, QueryBuilder requestFilter, @@ -595,12 +596,12 @@ public void analyzedPlan( PlanningProfile.TimeSpanMarker preAnalysisProfile = executionInfo.planningProfile().preAnalysis(); preAnalysisProfile.start(); - PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed); + PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed.plan()); preAnalysisProfile.stop(); // Initialize the PreAnalysisResult with the local cluster's minimum transport version, so our planning will be correct also in // case of ROW queries. ROW queries can still require inter-node communication (for ENRICH and LOOKUP JOIN execution) with an older // node in the same cluster; so assuming that all nodes are on the same version as this node will be wrong and may cause bugs. - PreAnalysisResult result = FieldNameUtils.resolveFieldNames(parsed, preAnalysis.enriches().isEmpty() == false) + PreAnalysisResult result = FieldNameUtils.resolveFieldNames(parsed.plan(), preAnalysis.enriches().isEmpty() == false) .withMinimumTransportVersion(localClusterMinimumVersion); String description = requestFilter == null ? "the only attempt without filter" : "first attempt with filter"; @@ -617,7 +618,7 @@ public void analyzedPlan( } private void resolveIndicesAndAnalyze( - LogicalPlan parsed, + EsqlStatement parsed, Configuration configuration, EsqlExecutionInfo executionInfo, String description, @@ -687,7 +688,7 @@ private void resolveIndicesAndAnalyze( ); }) .andThen((l, r) -> { - inferenceService.inferenceResolver(functionRegistry).resolveInferenceIds(parsed, l.map(r::withInferenceResolution)); + inferenceService.inferenceResolver(functionRegistry).resolveInferenceIds(parsed.plan(), l.map(r::withInferenceResolution)); }) .>andThen((l, r) -> { dependencyResolutionProfile.stop(); @@ -1058,7 +1059,7 @@ private static QueryBuilder createQueryFilter(IndexMode indexMode, QueryBuilder } private void analyzeWithRetry( - LogicalPlan parsed, + EsqlStatement parsed, Configuration configuration, EsqlExecutionInfo executionInfo, String description, @@ -1120,11 +1121,16 @@ private PhysicalPlan logicalPlanToPhysicalPlan( return EstimatesRowSize.estimateRowSize(0, physicalPlan); } - private LogicalPlan analyzedPlan(LogicalPlan parsed, Configuration configuration, PreAnalysisResult r, EsqlExecutionInfo executionInfo) - throws Exception { + private LogicalPlan analyzedPlan( + EsqlStatement parsed, + Configuration configuration, + PreAnalysisResult r, + EsqlExecutionInfo executionInfo + ) throws Exception { handleFieldCapsFailures(configuration.allowPartialResults(), executionInfo, r.indexResolution()); - Analyzer analyzer = new Analyzer(new AnalyzerContext(configuration, functionRegistry, r), verifier); - LogicalPlan plan = analyzer.analyze(parsed); + AnalyzerContext analyzerContext = new AnalyzerContext(configuration, functionRegistry, parsed.setting(UNMAPPED_FIELDS), r); + Analyzer analyzer = new Analyzer(analyzerContext, verifier); + LogicalPlan plan = analyzer.analyze(parsed.plan()); plan.setAnalyzed(); return plan; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index f41c879bec6a9..3ac6eea08d3bb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -16,7 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; -import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; import org.elasticsearch.xpack.esql.core.util.Holder; @@ -188,7 +188,7 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha // special handling for UnresolvedPattern (which is not an UnresolvedAttribute) p.forEachExpression(UnresolvedNamePattern.class, up -> { - var ua = new UnresolvedAttribute(up.source(), up.name()); + var ua = new UnresolvedPattern(up.source(), up.name()); referencesBuilder.get().add(ua); if (p instanceof Keep) { keepRefs.add(ua); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 7e4aa0cbab5ff..54bf8defad144 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -136,6 +136,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.queryClusterSettings; import static org.elasticsearch.xpack.esql.action.EsqlExecutionInfoTests.createEsqlExecutionInfo; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; @@ -565,7 +566,7 @@ private static EnrichPolicy loadEnrichPolicyMapping(String policyFileName) { } private LogicalPlan analyzedPlan( - LogicalPlan parsed, + EsqlStatement parsed, Configuration configuration, Map datasets, TransportVersion minimumVersion @@ -580,11 +581,12 @@ private LogicalPlan analyzedPlan( Map.of(), enrichPolicies, emptyInferenceResolution(), - minimumVersion + minimumVersion, + parsed.setting(UNMAPPED_FIELDS) ), TEST_VERIFIER ); - LogicalPlan plan = analyzer.analyze(parsed); + LogicalPlan plan = analyzer.analyze(parsed.plan()); plan.setAnalyzed(); LOGGER.debug("Analyzed plan:\n{}", plan); return plan; @@ -665,7 +667,7 @@ private ActualResults executePlan(BigArrays bigArrays) throws Exception { var testDatasets = testDatasets(statement.plan()); // Specifically use the newest transport version; the csv tests correspond to a single node cluster on the current version. TransportVersion minimumVersion = TransportVersion.current(); - LogicalPlan analyzed = analyzedPlan(statement.plan(), configuration, testDatasets, minimumVersion); + LogicalPlan analyzed = analyzedPlan(statement, configuration, testDatasets, minimumVersion); FoldContext foldCtx = FoldContext.small(); EsqlSession session = new EsqlSession( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index cd390e5f5fff9..e7a9009b763cd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.analysis; import org.elasticsearch.TransportVersion; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexMode; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.ESTestCase; @@ -24,6 +25,7 @@ import org.elasticsearch.xpack.esql.inference.ResolvedInference; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.QueryParams; +import org.elasticsearch.xpack.esql.plan.EsqlStatement; import org.elasticsearch.xpack.esql.plan.IndexPattern; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; @@ -49,6 +51,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.configuration; import static org.elasticsearch.xpack.esql.EsqlTestUtils.testAnalyzerContext; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; public final class AnalyzerTestUtils { @@ -99,6 +102,17 @@ public static Analyzer analyzer( EnrichResolution enrichResolution, Verifier verifier, Configuration config + ) { + return analyzer(indexResolutions, lookupResolution, enrichResolution, verifier, config, UNMAPPED_FIELDS.defaultValue()); + } + + public static Analyzer analyzer( + Map indexResolutions, + Map lookupResolution, + EnrichResolution enrichResolution, + Verifier verifier, + Configuration config, + UnmappedResolution unmappedResolution ) { return new Analyzer( testAnalyzerContext( @@ -107,7 +121,8 @@ public static Analyzer analyzer( mergeIndexResolutions(indexResolutions, defaultSubqueryResolution()), lookupResolution, enrichResolution, - defaultInferenceResolution() + defaultInferenceResolution(), + unmappedResolution ), verifier ); @@ -126,6 +141,22 @@ public static Analyzer analyzer(Map indexResoluti return analyzer(indexResolutions, defaultLookupResolution(), defaultEnrichResolution(), verifier, config); } + public static Analyzer analyzer( + Map indexResolutions, + Verifier verifier, + Configuration config, + EsqlStatement statement + ) { + return analyzer( + indexResolutions, + defaultLookupResolution(), + defaultEnrichResolution(), + verifier, + config, + statement.setting(UNMAPPED_FIELDS) + ); + } + public static Analyzer analyzer(Verifier verifier) { return analyzer(analyzerDefaultMapping(), defaultLookupResolution(), defaultEnrichResolution(), verifier, EsqlTestUtils.TEST_CFG); } @@ -135,7 +166,18 @@ public static Analyzer analyzer(Map indexResoluti } public static LogicalPlan analyze(String query) { - return analyze(query, "mapping-basic.json"); + var indexName = indexFromQuery(query); + var indexResolutions = indexResolutions(indexName); + return analyze(query, analyzer(indexResolutions, TEST_VERIFIER, TEST_CFG)); + } + + public static LogicalPlan analyzeStatement(String query) { + var statement = EsqlParser.INSTANCE.createStatement(query); + var relations = statement.plan().collectFirstChildren(UnresolvedRelation.class::isInstance); + var indexName = relations.isEmpty() ? null : ((UnresolvedRelation) relations.getFirst()).indexPattern().indexPattern(); + var indexResolutions = indexResolutions(indexName); + var analyzer = analyzer(indexResolutions, TEST_VERIFIER, configuration(query), statement); + return analyzer.analyze(statement.plan()); } public static LogicalPlan analyze(String query, String mapping) { @@ -168,11 +210,24 @@ public static LogicalPlan analyze(String query, TransportVersion transportVersio } } - private static final Pattern indexFromPattern = Pattern.compile("(?i)FROM\\s+([\\w-]+)"); + private static final Map MAPPING_BASIC_RESOLUTION = EsqlTestUtils.loadMapping("mapping-basic.json"); + + private static Map indexResolutions(@Nullable String indexName) { + if (indexName == null) { + return Map.of(); + } + + var indexResolution = IndexResolution.valid( + new EsIndex(indexName, MAPPING_BASIC_RESOLUTION, Map.of(indexName, IndexMode.STANDARD), Map.of(), Map.of(), Set.of()) + ); + return Map.of(new IndexPattern(Source.EMPTY, indexName), indexResolution); + } + + private static final Pattern INDEX_FROM_PATTERN = Pattern.compile("(?i)FROM\\s+([\\w-]+)"); private static String indexFromQuery(String query) { // Extract the index name from the FROM clause of the query using regexp - Matcher matcher = indexFromPattern.matcher(query); + Matcher matcher = INDEX_FROM_PATTERN.matcher(query); if (matcher.find()) { return matcher.group(1); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java new file mode 100644 index 0000000000000..14620b5f68fbb --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java @@ -0,0 +1,3056 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.esql.expression.function.aggregate.FilteredExpression; +import org.elasticsearch.xpack.esql.expression.function.aggregate.Max; +import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; +import org.elasticsearch.xpack.esql.expression.predicate.logical.And; +import org.elasticsearch.xpack.esql.expression.predicate.logical.Or; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; +import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; +import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.Dissect; +import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.Filter; +import org.elasticsearch.xpack.esql.plan.logical.Fork; +import org.elasticsearch.xpack.esql.plan.logical.InlineStats; +import org.elasticsearch.xpack.esql.plan.logical.Limit; +import org.elasticsearch.xpack.esql.plan.logical.MvExpand; +import org.elasticsearch.xpack.esql.plan.logical.OrderBy; +import org.elasticsearch.xpack.esql.plan.logical.Project; +import org.elasticsearch.xpack.esql.plan.logical.Row; +import org.elasticsearch.xpack.esql.plan.logical.Subquery; +import org.elasticsearch.xpack.esql.plan.logical.UnionAll; +import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes; +import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin; + +import java.util.List; + +import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzeStatement; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTests.withInlinestatsWarning; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasItems; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +public class AnalyzerUnmappedTests extends ESTestCase { + + /* + * Limit[1000[INTEGER],false,false] + * \_Project[[does_not_exist_field{r}#16]] + * \_Eval[[null[NULL] AS does_not_exist_field#16]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testKeep() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP does_not_exist_field + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(1)); + assertThat(Expressions.name(project.projections().getFirst()), is("does_not_exist_field")); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist_field")); + var literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Project[[does_not_exist_field{r}#18]] + * \_Eval[[null[NULL] AS does_not_exist_field#17]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testKeepRepeated() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP does_not_exist_field, does_not_exist_field + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(1)); + assertThat(Expressions.name(project.projections().getFirst()), is("does_not_exist_field")); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist_field")); + var literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + public void testFailKeepAndNonMatchingStar() { + var query = """ + FROM test + | KEEP does_not_exist_field* + """; + var failure = "No matches found for pattern [does_not_exist_field*]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Project[[emp_no{f}#6, does_not_exist_field{r}#18]] + * \_Eval[[null[NULL] AS does_not_exist_field#18]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testKeepAndMatchingStar() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP emp_*, does_not_exist_field + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(2)); + assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "does_not_exist_field"))); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist_field")); + var literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_EsqlProject[[does_not_exist_field1{r}#20, does_not_exist_field2{r}#22]] + * \_Eval[[TOINTEGER(does_not_exist_field1{r}#20) + 42[INTEGER] AS x#5]] + * \_Eval[[null[NULL] AS does_not_exist_field1#20, null[NULL] AS does_not_exist_field2#22]] + * \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..] + */ + public void testEvalAndKeep() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | EVAL x = does_not_exist_field1::INTEGER + 42 + | KEEP does_not_exist_field1, does_not_exist_field2 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(2)); + assertThat(Expressions.names(project.projections()), is(List.of("does_not_exist_field1", "does_not_exist_field2"))); + + var eval1 = as(project.child(), Eval.class); + assertThat(eval1.fields(), hasSize(1)); + var aliasX = as(eval1.fields().getFirst(), Alias.class); + assertThat(aliasX.name(), is("x")); + assertThat(Expressions.name(aliasX.child()), is("does_not_exist_field1::INTEGER + 42")); + + var eval2 = as(eval1.child(), Eval.class); + assertThat(Expressions.names(eval2.fields()), is(List.of("does_not_exist_field1", "does_not_exist_field2"))); + + var relation = as(eval2.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + public void testFailKeepAndMatchingAndNonMatchingStar() { + var query = """ + FROM test + | KEEP emp_*, does_not_exist_field* + """; + var failure = "No matches found for pattern [does_not_exist_field*]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailAfterKeep() { + var query = """ + FROM test + | KEEP emp_* + | EVAL x = does_not_exist_field + 1 + """; + var failure = "Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[does_not_exist_field{r}#22 + 2[INTEGER] AS y#9]] + * \_Eval[[emp_no{f}#11 + 1[INTEGER] AS x#6]] + * \_EsqlProject[[_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, gender{f}#13, hire_date{f}#18, job{f}#19, job.raw{f}#20, + * languages{f}#14, last_name{f}#15, long_noidx{f}#21, salary{f}#16, does_not_exist_field{r}#22]] + * \_Eval[[null[NULL] AS does_not_exist_field#22]] + * \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testEvalAfterKeepStar() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP * + | EVAL x = emp_no + 1 + | EVAL y = does_not_exist_field + 2 + """)); + + assertThat( + Expressions.names(plan.output()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "does_not_exist_field", + "x", + "y" + ) + ) + ); + var limit = as(plan, Limit.class); + var evalY = as(limit.child(), Eval.class); + var evalX = as(evalY.child(), Eval.class); + var esqlProject = as(evalX.child(), Project.class); + var evalNull = as(esqlProject.child(), Eval.class); + var source = as(evalNull.child(), EsRelation.class); + // TODO: golden testing + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[emp_does_not_exist_field{r}#23 + 2[INTEGER] AS y#9]] + * \_Eval[[emp_no{f}#11 + 1[INTEGER] AS x#6]] + * \_EsqlProject[[emp_no{f}#11, emp_does_not_exist_field{r}#23]] + * \_Eval[[null[NULL] AS emp_does_not_exist_field#23]] + * \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, + */ + public void testEvalAfterMatchingKeepWithWildcard() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP emp_* + | EVAL x = emp_no + 1 + | EVAL y = emp_does_not_exist_field + 2 + """)); + + assertThat(Expressions.names(plan.output()), is(List.of("emp_no", "emp_does_not_exist_field", "x", "y"))); + var limit = as(plan, Limit.class); + var evalY = as(limit.child(), Eval.class); + var evalX = as(evalY.child(), Eval.class); + var esqlProject = as(evalX.child(), Project.class); + var evalNull = as(esqlProject.child(), Eval.class); + var source = as(evalNull.child(), EsRelation.class); + // TODO: golden testing + } + + /* + * Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, gender{f}#7, hire_date{f}#12, job{f}#13, job.raw{f}#14, + * languages{f}#8, last_name{f}#9, long_noidx{f}#15, salary{f}#10]] + * \_Eval[[null[NULL] AS does_not_exist_field#16]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testDrop() { + var extraField = randomFrom("", "does_not_exist_field", "neither_this"); + var hasExtraField = extraField.isEmpty() == false; + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | DROP does_not_exist_field + """ + (hasExtraField ? ", " : "") + extraField)); // add emp_no to avoid "no fields left" case + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + // All fields from the original relation are projected, as the dropped field did not exist. + assertThat(project.projections(), hasSize(11)); + assertThat( + Expressions.names(project.projections()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary" + ) + ) + ); + + var eval = as(project.child(), Eval.class); + var expectedNames = hasExtraField && extraField.equals("does_not_exist_field") == false + ? List.of("does_not_exist_field", extraField) + : List.of("does_not_exist_field"); + assertThat(Expressions.names(eval.fields()), is(expectedNames)); + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + public void testFailDropWithNonMatchingStar() { + var query = """ + FROM test + | DROP does_not_exist_field* + """; + var failure = "No matches found for pattern [does_not_exist_field*]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{f}#12, first_name{f}#7, gender{f}#8, hire_date{f}#13, job{f}#14, job.raw{f}#15, languages{f}#9, + * last_name{f}#10, long_noidx{f}#16, salary{f}#11]] + * \_Eval[[null[NULL] AS does_not_exist_field#22]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testDropWithMatchingStar() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | DROP emp_*, does_not_exist_field + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(10)); + assertThat( + Expressions.names(project.projections()), + is( + List.of( + "_meta_field", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary" + ) + ) + ); + + var eval = as(project.child(), Eval.class); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist_field"))); + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + public void testFailDropWithMatchingAndNonMatchingStar() { + var query = """ + FROM test + | DROP emp_*, does_not_exist_field* + """; + var failure = "No matches found for pattern [does_not_exist_field*]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailEvalAfterDrop() { + var query = """ + FROM test + | DROP does_not_exist_field + | EVAL x = does_not_exist_field + 1 + """; + + var failure = "3:12: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Project[[_meta_field{f}#16, emp_no{f}#10 AS employee_number#8, first_name{f}#11, gender{f}#12, hire_date{f}#17, job{f}#18, + * job.raw{f}#19, languages{f}#13, last_name{f}#14, long_noidx{f}#20, salary{f}#15, + * now_it_does#12 AS does_not_exist_field{r}#21]] + * \_Eval[[null[NULL] AS does_not_exist_field#21]] + * \_EsRelation[test][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..] + */ + public void testRename() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | RENAME does_not_exist_field AS now_it_does, emp_no AS employee_number + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(12)); + assertThat( + Expressions.names(project.projections()), + is( + List.of( + "_meta_field", + "employee_number", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "now_it_does" + ) + ) + ); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist_field")); + var literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Project[[_meta_field{f}#19, emp_no{f}#13 AS employee_number#11, first_name{f}#14, gender{f}#15, hire_date{f}#20, + * job{f}#21, job.raw{f}#22, languages{f}#16, last_name{f}#17, long_noidx{f}#23, salary{f}#18, + * neither_does_this{r}#25 AS now_it_does#8]] + * \_Eval[[null[NULL] AS does_not_exist_field#24, null[NULL] AS neither_does_this#25]] + * \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] + */ + public void testRenameShadowed() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | RENAME does_not_exist_field AS now_it_does, neither_does_this AS now_it_does, emp_no AS employee_number + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(12)); + assertThat( + Expressions.names(project.projections()), + is( + List.of( + "_meta_field", + "employee_number", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "now_it_does" + ) + ) + ); + + var eval = as(project.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist_field")); + var literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + alias = as(eval.fields().getLast(), Alias.class); + assertThat(alias.name(), is("neither_does_this")); + literal = as(alias.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /** + * Limit[1000[INTEGER],false,false] + * \_Eval[[does_not_exist{r}#21 + 1[INTEGER] AS x#8]] + * \_EsqlProject[[_meta_field{f}#16, emp_no{f}#10 AS employee_number#5, first_name{f}#11, gender{f}#12, hire_date{f}#17, + * job{f}#18, job.raw{f}#19, languages{f}#13, last_name{f}#14, long_noidx{f}#20, salary{f}#15, does_not_exist{r}#21]] + * \_Eval[[null[NULL] AS does_not_exist#21]] + * \_EsRelation[test][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..] + */ + public void testEvalAfterRename() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | RENAME emp_no AS employee_number + | EVAL x = does_not_exist + 1 + """)); + + assertThat( + Expressions.names(plan.output()), + is( + List.of( + "_meta_field", + "employee_number", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "does_not_exist", + "x" + + ) + ) + ); + var limit = as(plan, Limit.class); + var eval1 = as(limit.child(), Eval.class); + var project = as(eval1.child(), Project.class); + var eval2 = as(project.child(), Eval.class); + var source = as(eval2.child(), EsRelation.class); + // TODO: golden testing + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[does_not_exist_field{r}#18 + 1[INTEGER] AS x#5]] + * \_Eval[[null[NULL] AS does_not_exist_field#18]] + * \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + */ + public void testEval() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | EVAL x = does_not_exist_field + 1 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var outerEval = as(limit.child(), Eval.class); + assertThat(outerEval.fields(), hasSize(1)); + var aliasX = as(outerEval.fields().getFirst(), Alias.class); + assertThat(aliasX.name(), is("x")); + assertThat(Expressions.name(aliasX.child()), is("does_not_exist_field + 1")); + + var innerEval = as(outerEval.child(), Eval.class); + assertThat(innerEval.fields(), hasSize(1)); + var aliasField = as(innerEval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(innerEval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[b{r}#15 + c{r}#18 AS y#12]] + * \_Eval[[a{r}#14 + b{r}#15 AS x#8]] + * \_Eval[[null[NULL] AS a#14, null[NULL] AS b#15, null[NULL] AS c#18]] + * \_Row[[1[INTEGER] AS x#4]] + */ + public void testMultipleEvaled() { + var plan = analyzeStatement(setUnmappedNullify(""" + ROW x = 1 + | EVAL x = a + b + | EVAL y = b + c + """)); + + // TODO: golden testing + assertThat(Expressions.names(plan.output()), is(List.of("a", "b", "c", "x", "y"))); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[TOLONG(does_not_exist_field{r}#18) AS x#5]] + * \_Eval[[null[NULL] AS does_not_exist_field#18]] + * \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + */ + public void testCasting() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | EVAL x = does_not_exist_field::LONG + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var outerEval = as(limit.child(), Eval.class); + assertThat(outerEval.fields(), hasSize(1)); + var aliasX = as(outerEval.fields().getFirst(), Alias.class); + assertThat(aliasX.name(), is("x")); + assertThat(Expressions.name(aliasX.child()), is("does_not_exist_field::LONG")); + + var innerEval = as(outerEval.child(), Eval.class); + assertThat(innerEval.fields(), hasSize(1)); + var aliasField = as(innerEval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(innerEval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[TOLONG(does_not_exist_field{r}#17) AS does_not_exist_field::LONG#4]] + * \_Eval[[null[NULL] AS does_not_exist_field#17]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testCastingNoAliasing() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | EVAL does_not_exist_field::LONG + """)); + + assertThat(Expressions.names(plan.output()), hasItems("does_not_exist_field", "does_not_exist_field::LONG")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[42[INTEGER] AS does_not_exist_field#7]] + * \_Eval[[does_not_exist_field{r}#20 + 1[INTEGER] AS x#5]] + * \_Eval[[null[NULL] AS does_not_exist_field#20]] + * \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..] + */ + public void testShadowingAfterEval() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | EVAL x = does_not_exist_field + 1 + | EVAL does_not_exist_field = 42 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var outerMostEval = as(limit.child(), Eval.class); + assertThat(outerMostEval.fields(), hasSize(1)); + var aliasShadow = as(outerMostEval.fields().getFirst(), Alias.class); + assertThat(aliasShadow.name(), is("does_not_exist_field")); + assertThat(Expressions.name(aliasShadow.child()), is("42")); + + var middleEval = as(outerMostEval.child(), Eval.class); + assertThat(middleEval.fields(), hasSize(1)); + var aliasX = as(middleEval.fields().getFirst(), Alias.class); + assertThat(aliasX.name(), is("x")); + assertThat(Expressions.name(aliasX.child()), is("does_not_exist_field + 1")); + + var innerEval = as(middleEval.child(), Eval.class); + assertThat(innerEval.fields(), hasSize(1)); + var aliasField = as(innerEval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(innerEval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[42[INTEGER] AS does_not_exist_field#5]] + * \_Project[[does_not_exist_field{r}#18]] + * \_Eval[[null[NULL] AS does_not_exist_field#18]] + * \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + */ + public void testShadowingAfterKeep() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP does_not_exist_field + | EVAL does_not_exist_field = 42 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var outerMostEval = as(limit.child(), Eval.class); + assertThat(outerMostEval.fields(), hasSize(1)); + var aliasShadow = as(outerMostEval.fields().getFirst(), Alias.class); + assertThat(aliasShadow.name(), is("does_not_exist_field")); + assertThat(Expressions.name(aliasShadow.child()), is("42")); + + var project = as(outerMostEval.child(), Project.class); + assertThat(project.projections(), hasSize(1)); + assertThat(Expressions.name(project.projections().getFirst()), is("does_not_exist_field")); + + var innerEval = as(project.child(), Eval.class); + assertThat(innerEval.fields(), hasSize(1)); + var aliasField = as(innerEval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(innerEval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + public void testFailDropThenKeep() { + var query = """ + FROM test + | DROP does_not_exist_field + | KEEP does_not_exist_field + """; + var failure = "line 3:8: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailDropThenEval() { + var query = """ + FROM test + | DROP does_not_exist_field + | EVAL does_not_exist_field + 2 + """; + var failure = "line 3:8: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailEvalThenDropThenEval() { + var query = """ + FROM test + | KEEP does_not_exist_field + | EVAL x = does_not_exist_field::LONG + 1 + | WHERE x IS NULL + | DROP does_not_exist_field + | EVAL does_not_exist_field::LONG + 2 + """; + var failure = "line 6:8: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailStatsThenKeep() { + var query = """ + FROM test + | STATS cnd = COUNT(*) + | KEEP does_not_exist_field + """; + var failure = "line 3:8: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailStatsThenEval() { + var query = """ + FROM test + | STATS cnt = COUNT(*) + | EVAL x = does_not_exist_field + cnt + """; + var failure = "line 3:12: Unknown column [does_not_exist_field]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[],[COUNT(does_not_exist_field{r}#18,true[BOOLEAN],PT0S[TIME_DURATION]) AS cnt#5]] + * \_Eval[[null[NULL] AS does_not_exist_field#18]] + * \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + */ + public void testStatsAgg() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS cnt = COUNT(does_not_exist_field) + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(0)); + assertThat(agg.aggregates(), hasSize(1)); + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("cnt")); + assertThat(Expressions.name(alias.child()), is("COUNT(does_not_exist_field)")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var aliasField = as(eval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist_field{r}#16],[does_not_exist_field{r}#16]] + * \_Eval[[null[NULL] AS does_not_exist_field#16]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testStatsGroup() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS BY does_not_exist_field + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(1)); + assertThat(Expressions.name(agg.groupings().getFirst()), is("does_not_exist_field")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var aliasField = as(eval.fields().getFirst(), Alias.class); + assertThat(aliasField.name(), is("does_not_exist_field")); + var literal = as(aliasField.child(), Literal.class); + assertThat(literal.dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist2{r}#19],[SUM(does_not_exist1{r}#20,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS s + * #6, does_not_exist2{r}#19]] + * \_Eval[[null[NULL] AS does_not_exist2#19, null[NULL] AS does_not_exist1#20]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testStatsAggAndGroup() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS s = SUM(does_not_exist1) BY does_not_exist2 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(1)); + assertThat(Expressions.name(agg.groupings().getFirst()), is("does_not_exist2")); + assertThat(agg.aggregates(), hasSize(2)); // includes grouping key + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("s")); + assertThat(Expressions.name(alias.child()), is("SUM(does_not_exist1)")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + var alias2 = as(eval.fields().getFirst(), Alias.class); + assertThat(alias2.name(), is("does_not_exist2")); + assertThat(as(alias2.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias1 = as(eval.fields().getLast(), Alias.class); + assertThat(alias1.name(), is("does_not_exist1")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist2{r}#24 AS d2#5, emp_no{f}#13],[SUM(does_not_exist1{r}#25,true[BOOLEAN],PT0S[TIME_DURATION], + * compensated[KEYWORD]) + d2{r}#5 AS s#10, d2{r}#5, emp_no{f}#13]] + * \_Eval[[null[NULL] AS does_not_exist2#24, null[NULL] AS does_not_exist1#25, null[NULL] AS d2#26]] + * \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] + */ + public void testStatsAggAndAliasedGroup() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS s = SUM(does_not_exist1) + d2 BY d2 = does_not_exist2, emp_no + """)); + + assertThat(Expressions.names(plan.output()), is(List.of("s", "d2", "emp_no"))); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + var groupAlias = as(agg.groupings().getFirst(), Alias.class); + assertThat(groupAlias.name(), is("d2")); + assertThat(Expressions.name(groupAlias.child()), is("does_not_exist2")); + assertThat(Expressions.name(agg.groupings().get(1)), is("emp_no")); + + assertThat(agg.aggregates(), hasSize(3)); // includes grouping keys + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("s")); + assertThat(Expressions.name(alias.child()), is("SUM(does_not_exist1) + d2")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(3)); + var alias2 = as(eval.fields().get(0), Alias.class); + assertThat(alias2.name(), is("does_not_exist2")); + assertThat(as(alias2.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias1 = as(eval.fields().get(1), Alias.class); + assertThat(alias1.name(), is("does_not_exist1")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias0 = as(eval.fields().get(2), Alias.class); + assertThat(alias0.name(), is("d2")); + assertThat(as(alias0.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist2{r}#29 + does_not_exist3{r}#30 AS s0#6, emp_no{f}#18 AS s1#9],[SUM(does_not_exist1{r}#31,true[B + * OOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) + s0{r}#6 + s1{r}#9 AS sum#14, s0{r}#6, s1{r}#9]] + * \_Eval[[null[NULL] AS does_not_exist2#29, null[NULL] AS does_not_exist3#30, null[NULL] AS does_not_exist1#31, + * null[NULL] AS s0#32]] + * \_EsRelation[test][_meta_field{f}#24, emp_no{f}#18, first_name{f}#19, ..] + */ + public void testStatsAggAndAliasedGroupWithExpression() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS sum = SUM(does_not_exist1) + s0 + s1 BY s0 = does_not_exist2 + does_not_exist3, s1 = emp_no + """)); + + assertThat(Expressions.names(plan.output()), is(List.of("sum", "s0", "s1"))); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + assertThat(Expressions.names(agg.groupings()), is(List.of("s0", "s1"))); + + assertThat(agg.aggregates(), hasSize(3)); // includes grouping keys + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("sum")); + assertThat(Expressions.name(alias.child()), is("SUM(does_not_exist1) + s0 + s1")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(4)); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist2", "does_not_exist3", "does_not_exist1", "s0"))); + eval.fields().forEach(a -> assertThat(as(as(a, Alias.class).child(), Literal.class).dataType(), is(DataType.NULL))); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist2{r}#22, emp_no{f}#11],[SUM(does_not_exist1{r}#23,true[BOOLEAN],PT0S[TIME_DURATION], + * compensated[KEYWORD]) AS s#7, COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#9, does_not_exist2{r}#22, emp_no{f}#11]] + * \_Eval[[null[NULL] AS does_not_exist2#22, null[NULL] AS does_not_exist1#23]] + * \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testStatsMixed() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS s = SUM(does_not_exist1), c = COUNT(*) BY does_not_exist2, emp_no + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + assertThat(Expressions.names(agg.groupings()), is(List.of("does_not_exist2", "emp_no"))); + + assertThat(agg.aggregates(), hasSize(4)); // includes grouping keys + assertThat(Expressions.names(agg.aggregates()), is(List.of("s", "c", "does_not_exist2", "emp_no"))); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist2", "does_not_exist1"))); + eval.fields().forEach(a -> assertThat(as(as(a, Alias.class).child(), Literal.class).dataType(), is(DataType.NULL))); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_InlineStats[] + * \_Aggregate[[does_not_exist2{r}#22, emp_no{f}#11],[SUM(does_not_exist1{r}#23,true[BOOLEAN],PT0S[TIME_DURATION],compensated[ + * KEYWORD]) AS s#5, COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#7, does_not_exist2{r}#22, emp_no{f}#11]] + * \_Eval[[null[NULL] AS does_not_exist2#22, null[NULL] AS does_not_exist1#23]] + * \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testInlineStatsMixed() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | INLINE STATS s = SUM(does_not_exist1), c = COUNT(*) BY does_not_exist2, emp_no + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var inlineStats = as(limit.child(), InlineStats.class); + var agg = as(inlineStats.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + assertThat(Expressions.names(agg.groupings()), is(List.of("does_not_exist2", "emp_no"))); + + assertThat(agg.aggregates(), hasSize(4)); // includes grouping keys + assertThat(Expressions.names(agg.aggregates()), is(List.of("s", "c", "does_not_exist2", "emp_no"))); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist2", "does_not_exist1"))); + eval.fields().forEach(a -> assertThat(as(as(a, Alias.class).child(), Literal.class).dataType(), is(DataType.NULL))); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist3{r}#24, emp_no{f}#13, does_not_exist2{r}#25],[SUM(does_not_exist1{r}#26,true[BOOLEAN], + * PT0S[TIME_DURATION],compensated[KEYWORD]) + does_not_exist2{r}#25 AS s#9, COUNT(*[KEYWORD],true[BOOLEAN], + * PT0S[TIME_DURATION]) AS c#11, does_not_exist3{r}#24, emp_no{f}#13, does_not_exist2{r}#25]] + * \_Eval[[null[NULL] AS does_not_exist3#24, null[NULL] AS does_not_exist2#25]] + * \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] + */ + public void testStatsMixedAndExpressions() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS s = SUM(does_not_exist1) + does_not_exist2, c = COUNT(*) BY does_not_exist3, emp_no, does_not_exist2 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(3)); + assertThat(Expressions.names(agg.groupings()), is(List.of("does_not_exist3", "emp_no", "does_not_exist2"))); + + assertThat(agg.aggregates(), hasSize(5)); // includes grouping keys + assertThat(Expressions.names(agg.aggregates()), is(List.of("s", "c", "does_not_exist3", "emp_no", "does_not_exist2"))); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(3)); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist3", "does_not_exist2", "does_not_exist1"))); + eval.fields().forEach(a -> assertThat(as(as(a, Alias.class).child(), Literal.class).dataType(), is(DataType.NULL))); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Filter[TOLONG(does_not_exist{r}#33) > 0[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist#33]] + * \_EsRelation[test][_meta_field{f}#28, emp_no{f}#22, first_name{f}#23, ..] + */ + public void testWhere() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE does_not_exist::LONG > 0 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var filter = as(limit.child(), Filter.class); + assertThat(Expressions.name(filter.condition()), is("does_not_exist::LONG > 0")); + + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Filter[TOLONG(does_not_exist{r}#195) > 0[INTEGER] OR emp_no{f}#184 > 0[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist#195]] + * \_EsRelation[test][_meta_field{f}#190, emp_no{f}#184, first_name{f}#18..] + */ + public void testWhereConjunction() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE does_not_exist::LONG > 0 OR emp_no > 0 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var filter = as(limit.child(), Filter.class); + assertThat(Expressions.name(filter.condition()), is("does_not_exist::LONG > 0 OR emp_no > 0")); + + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Filter[TOLONG(does_not_exist1{r}#491) > 0[INTEGER] OR emp_no{f}#480 > 0[INTEGER] + * AND TOLONG(does_not_exist2{r}#492) < 100[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist1#491, null[NULL] AS does_not_exist2#492]] + * \_EsRelation[test][_meta_field{f}#486, emp_no{f}#480, first_name{f}#48..] + */ + public void testWhereConjunctionMultipleFields() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE does_not_exist1::LONG > 0 OR emp_no > 0 AND does_not_exist2::LONG < 100 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var filter = as(limit.child(), Filter.class); + assertThat(Expressions.name(filter.condition()), is("does_not_exist1::LONG > 0 OR emp_no > 0 AND does_not_exist2::LONG < 100")); + + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + var alias1 = as(eval.fields().get(0), Alias.class); + assertThat(alias1.name(), is("does_not_exist1")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias2 = as(eval.fields().get(1), Alias.class); + assertThat(alias2.name(), is("does_not_exist2")); + assertThat(as(alias2.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[],[FilteredExpression[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]), + * TOLONG(does_not_exist1{r}#94) > 0[INTEGER]] AS c#81]] + * \_Eval[[null[NULL] AS does_not_exist1#94]] + * \_EsRelation[test][_meta_field{f}#89, emp_no{f}#83, first_name{f}#84, ..] + */ + public void testAggsFiltering() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS c = COUNT(*) WHERE does_not_exist1::LONG > 0 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(0)); + assertThat(agg.aggregates(), hasSize(1)); + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("c")); + var filteredExpr = as(alias.child(), FilteredExpression.class); + var delegate = as(filteredExpr.delegate(), Count.class); + var greaterThan = as(filteredExpr.filter(), GreaterThan.class); + var tolong = as(greaterThan.left(), ToLong.class); + assertThat(Expressions.name(tolong.field()), is("does_not_exist1")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias1 = as(eval.fields().getFirst(), Alias.class); + assertThat(alias1.name(), is("does_not_exist1")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[],[FilteredExpression[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]), + * TOLONG(does_not_exist1{r}#620) > 0[INTEGER] OR emp_no{f}#609 > 0[INTEGER] + * OR TOLONG(does_not_exist2{r}#621) < 100[INTEGER]] AS c1#602, + * FilteredExpression[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]),ISNULL(does_not_exist3{r}#622)] AS c2#607]] + * \_Eval[[null[NULL] AS does_not_exist1#620, null[NULL] AS does_not_exist2#621, null[NULL] AS does_not_exist3#622]] + * \_EsRelation[test][_meta_field{f}#615, emp_no{f}#609, first_name{f}#61..] + */ + public void testAggsFilteringMultipleFields() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | STATS c1 = COUNT(*) WHERE does_not_exist1::LONG > 0 OR emp_no > 0 OR does_not_exist2::LONG < 100, + c2 = COUNT(*) WHERE does_not_exist3 IS NULL + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(0)); + assertThat(agg.aggregates(), hasSize(2)); + + var alias1 = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias1.name(), is("c1")); + assertThat( + Expressions.name(alias1.child()), + is("c1 = COUNT(*) WHERE does_not_exist1::LONG > 0 OR emp_no > 0 OR does_not_exist2::LONG < 100") + ); + + var alias2 = as(agg.aggregates().get(1), Alias.class); + assertThat(alias2.name(), is("c2")); + assertThat(Expressions.name(alias2.child()), is("c2 = COUNT(*) WHERE does_not_exist3 IS NULL")); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(3)); + var aliasDne1 = as(eval.fields().get(0), Alias.class); + assertThat(aliasDne1.name(), is("does_not_exist1")); + assertThat(as(aliasDne1.child(), Literal.class).dataType(), is(DataType.NULL)); + var aliasDne2 = as(eval.fields().get(1), Alias.class); + assertThat(aliasDne2.name(), is("does_not_exist2")); + assertThat(as(aliasDne2.child(), Literal.class).dataType(), is(DataType.NULL)); + var aliasDne3 = as(eval.fields().get(2), Alias.class); + assertThat(aliasDne3.name(), is("does_not_exist3")); + assertThat(as(aliasDne3.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[does_not_exist{r}#16,ASC,LAST]]] + * \_Eval[[null[NULL] AS does_not_exist#16]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testSort() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | SORT does_not_exist ASC + """)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // OrderBy over the Eval-produced alias + var orderBy = as(limit.child(), OrderBy.class); + + // Eval introduces does_not_exist as NULL + var eval = as(orderBy.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + // Underlying relation + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[TOLONG(does_not_exist{r}#485) + 1[INTEGER],ASC,LAST]]] + * \_Eval[[null[NULL] AS does_not_exist#485]] + * \_EsRelation[test][_meta_field{f}#480, emp_no{f}#474, first_name{f}#47..] + */ + public void testSortExpression() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | SORT does_not_exist::LONG + 1 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var orderBy = as(limit.child(), OrderBy.class); + assertThat(orderBy.order(), hasSize(1)); + assertThat(Expressions.name(orderBy.order().getFirst().child()), is("does_not_exist::LONG + 1")); + + var eval = as(orderBy.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[TOLONG(does_not_exist{r}#370) + 1[INTEGER],ASC,LAST], Order[does_not_exist2{r}#371,DESC,FIRST], + * Order[emp_no{f}#359,ASC,LAST]]] + * \_Eval[[null[NULL] AS does_not_exist1#370, null[NULL] AS does_not_exist2#371]] + * \_EsRelation[test][_meta_field{f}#365, emp_no{f}#359, first_name{f}#36..] + */ + public void testSortExpressionMultipleFields() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | SORT does_not_exist1::LONG + 1, does_not_exist2 DESC, emp_no ASC + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var orderBy = as(limit.child(), OrderBy.class); + assertThat(orderBy.order(), hasSize(3)); + assertThat(Expressions.name(orderBy.order().get(0).child()), is("does_not_exist1::LONG + 1")); + assertThat(Expressions.name(orderBy.order().get(1).child()), is("does_not_exist2")); + assertThat(Expressions.name(orderBy.order().get(2).child()), is("emp_no")); + + var eval = as(orderBy.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + var alias1 = as(eval.fields().get(0), Alias.class); + assertThat(alias1.name(), is("does_not_exist1")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias2 = as(eval.fields().get(1), Alias.class); + assertThat(alias2.name(), is("does_not_exist2")); + assertThat(as(alias2.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_MvExpand[does_not_exist{r}#17,does_not_exist{r}#20] + * \_Eval[[null[NULL] AS does_not_exist#17]] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + */ + public void testMvExpand() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | MV_EXPAND does_not_exist + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var mvExpand = as(limit.child(), MvExpand.class); + assertThat(Expressions.name(mvExpand.expanded()), is("does_not_exist")); + + var eval = as(mvExpand.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Filter[TOLONG(does_not_exist{r}#566) > 1[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist#566]] + * \_EsRelation[languages][language_code{f}#564, language_name{f}#565] + */ + public void testSubqueryOnly() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM languages + | WHERE does_not_exist::LONG > 1) + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var filter = as(limit.child(), Filter.class); + var gt = as(filter.condition(), GreaterThan.class); + var toLong = as(gt.left(), ToLong.class); + assertThat(Expressions.name(toLong.field()), is("does_not_exist")); + + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().getFirst(), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("languages")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_UnionAll[[language_code{r}#22, language_name{r}#23, does_not_exist1{r}#24, @timestamp{r}#25, client_ip{r}#26, event_dur + * ation{r}#27, message{r}#28]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[language_code{f}#6, language_name{f}#7, does_not_exist1{r}#12, @timestamp{r}#16, client_ip{r}#17, event_durat + * ion{r}#18, message{r}#19]] + * | \_Eval[[null[DATETIME] AS @timestamp#16, null[IP] AS client_ip#17, null[LONG] AS event_duration#18, null[KEYWORD] AS + * message#19]] + * | \_Subquery[] + * | \_Filter[TOLONG(does_not_exist1{r}#12) > 1[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#12]] + * | \_EsRelation[languages][language_code{f}#6, language_name{f}#7] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[language_code{r}#20, language_name{r}#21, does_not_exist1{r}#14, @timestamp{f}#8, client_ip{f}#9, event_durat + * ion{f}#10, message{f}#11]] + * \_Eval[[null[INTEGER] AS language_code#20, null[KEYWORD] AS language_name#21]] + * \_Subquery[] + * \_Filter[TODOUBLE(does_not_exist1{r}#14) > 10.0[DOUBLE]] + * \_Eval[[null[NULL] AS does_not_exist1#14]] + * \_EsRelation[sample_data][@timestamp{f}#8, client_ip{f}#9, event_duration{f}#..] + */ + public void testDoubleSubqueryOnly() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM languages + | WHERE does_not_exist1::LONG > 1), + (FROM sample_data + | WHERE does_not_exist1::DOUBLE > 10.) + """)); + + var topLimit = as(plan, Limit.class); + assertThat(topLimit.limit().fold(FoldContext.small()), is(1000)); + + var union = as(topLimit.child(), UnionAll.class); + assertThat(union.children(), hasSize(2)); + + // Left branch: languages + var leftLimit = as(union.children().get(0), Limit.class); + assertThat(leftLimit.limit().fold(FoldContext.small()), is(1000)); + + var leftProject = as(leftLimit.child(), Project.class); + var leftEval = as(leftProject.child(), Eval.class); + // Verify unmapped null aliases for @timestamp, client_ip, event_duration, message + assertThat(leftEval.fields(), hasSize(4)); + var leftTs = as(leftEval.fields().get(0), Alias.class); + assertThat(leftTs.name(), is("@timestamp")); + assertThat(as(leftTs.child(), Literal.class).dataType(), is(DataType.DATETIME)); + var leftIp = as(leftEval.fields().get(1), Alias.class); + assertThat(leftIp.name(), is("client_ip")); + assertThat(as(leftIp.child(), Literal.class).dataType(), is(DataType.IP)); + var leftDur = as(leftEval.fields().get(2), Alias.class); + assertThat(leftDur.name(), is("event_duration")); + assertThat(as(leftDur.child(), Literal.class).dataType(), is(DataType.LONG)); + var leftMsg = as(leftEval.fields().get(3), Alias.class); + assertThat(leftMsg.name(), is("message")); + assertThat(as(leftMsg.child(), Literal.class).dataType(), is(DataType.KEYWORD)); + + var leftSubquery = as(leftEval.child(), Subquery.class); + var leftSubFilter = as(leftSubquery.child(), Filter.class); + var leftGt = as(leftSubFilter.condition(), GreaterThan.class); + var leftToLong = as(leftGt.left(), ToLong.class); + assertThat(Expressions.name(leftToLong.field()), is("does_not_exist1")); + + var leftSubEval = as(leftSubFilter.child(), Eval.class); + assertThat(leftSubEval.fields(), hasSize(1)); + var leftDoesNotExist = as(leftSubEval.fields().getFirst(), Alias.class); + assertThat(leftDoesNotExist.name(), is("does_not_exist1")); + assertThat(as(leftDoesNotExist.child(), Literal.class).dataType(), is(DataType.NULL)); + + var leftRel = as(leftSubEval.child(), EsRelation.class); + assertThat(leftRel.indexPattern(), is("languages")); + + // Right branch: sample_data + var rightLimit = as(union.children().get(1), Limit.class); + assertThat(rightLimit.limit().fold(FoldContext.small()), is(1000)); + + var rightProject = as(rightLimit.child(), Project.class); + var rightEval = as(rightProject.child(), Eval.class); + // Verify unmapped null aliases for language_code, language_name + assertThat(rightEval.fields(), hasSize(2)); + var rightCode = as(rightEval.fields().get(0), Alias.class); + assertThat(rightCode.name(), is("language_code")); + assertThat(as(rightCode.child(), Literal.class).dataType(), is(DataType.INTEGER)); + var rightName = as(rightEval.fields().get(1), Alias.class); + assertThat(rightName.name(), is("language_name")); + assertThat(as(rightName.child(), Literal.class).dataType(), is(DataType.KEYWORD)); + + var rightSubquery = as(rightEval.child(), Subquery.class); + var rightSubFilter = as(rightSubquery.child(), Filter.class); + var rightGt = as(rightSubFilter.condition(), GreaterThan.class); + var rightToDouble = as(rightGt.left(), ToDouble.class); + assertThat(Expressions.name(rightToDouble.field()), is("does_not_exist1")); + + var rightSubEval = as(rightSubFilter.child(), Eval.class); + assertThat(rightSubEval.fields(), hasSize(1)); + var rightDoesNotExist = as(rightSubEval.fields().getFirst(), Alias.class); + assertThat(rightDoesNotExist.name(), is("does_not_exist1")); + assertThat(as(rightDoesNotExist.child(), Literal.class).dataType(), is(DataType.NULL)); + + var rightRel = as(rightSubEval.child(), EsRelation.class); + assertThat(rightRel.indexPattern(), is("sample_data")); + } + + /* + * Project[[language_code{r}#23, language_name{r}#24, does_not_exist1{r}#25, @timestamp{r}#26, client_ip{r}#27, event_duration{r}#28, + * message{r}#29, does_not_exist2{r}#30]] + * \_Limit[1000[INTEGER],false,false] + * \_Filter[$$does_not_exist2$converted_to$long{r$}#36 < 100[INTEGER]] + * \_UnionAll[[language_code{r}#23, language_name{r}#24, does_not_exist1{r}#25, @timestamp{r}#26, client_ip{r}#27, + * event_duration{r}#28, message{r}#29, does_not_exist2{r}#30, $$does_not_exist2$converted_to$long{r$}#36]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[language_code{f}#7, language_name{f}#8, does_not_exist1{r}#13, @timestamp{r}#17, client_ip{r}#18, + * event_duration{r}#19, message{r}#20, does_not_exist2{r}#31, $$does_not_exist2$converted_to$long{r}#34]] + * | \_Eval[[TOLONG(does_not_exist2{r}#31) AS $$does_not_exist2$converted_to$long#34]] + * | \_Eval[[null[DATETIME] AS @timestamp#17, null[IP] AS client_ip#18, null[LONG] AS event_duration#19, + * null[KEYWORD] AS message#20]] + * | \_Subquery[] + * | \_Filter[TOLONG(does_not_exist1{r}#13) > 1[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#13, null[NULL] AS does_not_exist2#30]] + * | \_EsRelation[languages][language_code{f}#7, language_name{f}#8] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[language_code{r}#21, language_name{r}#22, does_not_exist1{r}#15, @timestamp{f}#9, client_ip{f}#10, + * event_duration{f}#11, message{f}#12, does_not_exist2{r}#32, $$does_not_exist2$converted_to$long{r}#35]] + * \_Eval[[TOLONG(does_not_exist2{r}#32) AS $$does_not_exist2$converted_to$long#35]] + * \_Eval[[null[INTEGER] AS language_code#21, null[KEYWORD] AS language_name#22]] + * \_Subquery[] + * \_Filter[TODOUBLE(does_not_exist1{r}#15) > 10.0[DOUBLE]] + * \_Eval[[null[NULL] AS does_not_exist1#15, null[NULL] AS does_not_exist2#30]] + * \_EsRelation[sample_data][@timestamp{f}#9, client_ip{f}#10, event_duration{f}..] + */ + public void testDoubleSubqueryOnlyWithTopFilterAndNoMain() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM languages + | WHERE does_not_exist1::LONG > 1), + (FROM sample_data + | WHERE does_not_exist1::DOUBLE > 10.) + | WHERE does_not_exist2::LONG < 100 + """)); + + // Top-level Project wrapping the plan + var topProject = as(plan, Project.class); + + // Below Project is Limit + var limit = as(topProject.child(), Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // Below Limit is Filter with does_not_exist2 conversion + var filter = as(limit.child(), Filter.class); + var filterCondition = as(filter.condition(), LessThan.class); + assertThat(Expressions.name(filterCondition.right()), is("100")); + + // Below Filter is UnionAll + var union = as(filter.child(), UnionAll.class); + assertThat(union.children(), hasSize(2)); + + // Left branch: languages + var leftLimit = as(union.children().get(0), Limit.class); + assertThat(leftLimit.limit().fold(FoldContext.small()), is(1000)); + + var leftProject = as(leftLimit.child(), Project.class); + assertThat( + Expressions.names(leftProject.output()), + is( + List.of( + "language_code", + "language_name", + "does_not_exist1", + "@timestamp", + "client_ip", + "event_duration", + "message", + "does_not_exist2", + "$$does_not_exist2$converted_to$long" + ) + ) + ); + var leftEval = as(leftProject.child(), Eval.class); + assertThat(leftEval.fields(), hasSize(1)); + assertThat(Expressions.name(leftEval.fields().getFirst()), is("$$does_not_exist2$converted_to$long")); + var leftEvalEval = as(leftEval.child(), Eval.class); + // Verify unmapped null aliases for @timestamp, client_ip, event_duration, message + assertThat(Expressions.names(leftEvalEval.fields()), is(List.of("@timestamp", "client_ip", "event_duration", "message"))); + + var leftSubquery = as(leftEvalEval.child(), Subquery.class); + var leftSubFilter = as(leftSubquery.child(), Filter.class); + var leftGt = as(leftSubFilter.condition(), GreaterThan.class); + var leftToLong = as(leftGt.left(), ToLong.class); + assertThat(Expressions.name(leftToLong.field()), is("does_not_exist1")); + + var leftSubEval = as(leftSubFilter.child(), Eval.class); + assertThat(leftSubEval.fields(), hasSize(2)); + var leftDoesNotExist1 = as(leftSubEval.fields().get(0), Alias.class); + assertThat(leftDoesNotExist1.name(), is("does_not_exist1")); + assertThat(as(leftDoesNotExist1.child(), Literal.class).dataType(), is(DataType.NULL)); + var leftDoesNotExist2 = as(leftSubEval.fields().get(1), Alias.class); + assertThat(leftDoesNotExist2.name(), is("does_not_exist2")); + assertThat(as(leftDoesNotExist2.child(), Literal.class).dataType(), is(DataType.NULL)); + + var leftRel = as(leftSubEval.child(), EsRelation.class); + assertThat(leftRel.indexPattern(), is("languages")); + + // Right branch: sample_data + var rightLimit = as(union.children().get(1), Limit.class); + assertThat(rightLimit.limit().fold(FoldContext.small()), is(1000)); + + var rightProject = as(rightLimit.child(), Project.class); + assertThat( + Expressions.names(rightProject.output()), + is( + List.of( + "language_code", + "language_name", + "does_not_exist1", + "@timestamp", + "client_ip", + "event_duration", + "message", + "does_not_exist2", + "$$does_not_exist2$converted_to$long" + ) + ) + ); + var rightEval = as(rightProject.child(), Eval.class); + assertThat(Expressions.name(rightEval.fields().getFirst()), is("$$does_not_exist2$converted_to$long")); + var rightEvalEval = as(rightEval.child(), Eval.class); + assertThat(Expressions.names(rightEvalEval.fields()), is(List.of("language_code", "language_name"))); + + var rightSubquery = as(rightEvalEval.child(), Subquery.class); + var rightSubFilter = as(rightSubquery.child(), Filter.class); + var rightGt = as(rightSubFilter.condition(), GreaterThan.class); + var rightToDouble = as(rightGt.left(), ToDouble.class); + assertThat(Expressions.name(rightToDouble.field()), is("does_not_exist1")); + + var rightSubEval = as(rightSubFilter.child(), Eval.class); + assertThat(rightSubEval.fields(), hasSize(2)); + var rightDoesNotExist1 = as(rightSubEval.fields().get(0), Alias.class); + assertThat(rightDoesNotExist1.name(), is("does_not_exist1")); + assertThat(as(rightDoesNotExist1.child(), Literal.class).dataType(), is(DataType.NULL)); + var rightDoesNotExist2 = as(rightSubEval.fields().get(1), Alias.class); + assertThat(rightDoesNotExist2.name(), is("does_not_exist2")); + assertThat(as(rightDoesNotExist2.child(), Literal.class).dataType(), is(DataType.NULL)); + + var rightRel = as(rightSubEval.child(), EsRelation.class); + assertThat(rightRel.indexPattern(), is("sample_data")); + } + + /* + * Project[[_meta_field{r}#36, emp_no{r}#37, first_name{r}#38, gender{r}#39, hire_date{r}#40, job{r}#41, job.raw{r}#42, + * languages{r}#43, last_name{r}#44, long_noidx{r}#45, salary{r}#46, language_code{r}#47, language_name{r}#48, + * does_not_exist1{r}#49, does_not_exist2{r}#50]] + * \_Limit[1000[INTEGER],false,false] + * \_Filter[$$does_not_exist2$converted_to$long{r$}#56 < 10[INTEGER] AND emp_no{r}#37 > 0[INTEGER]] + * \_UnionAll[[_meta_field{r}#36, emp_no{r}#37, first_name{r}#38, gender{r}#39, hire_date{r}#40, job{r}#41, job.raw{r}#42, + * languages{r}#43, last_name{r}#44, long_noidx{r}#45, salary{r}#46, language_code{r}#47, language_name{r}#48, + * does_not_exist1{r}#49, does_not_exist2{r}#50, $$does_not_exist2$converted_to$long{r$}#56]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, gender{f}#9, hire_date{f}#14, job{f}#15, job.raw{f}#16, + * languages{f}#10, last_name{f}#11, long_noidx{f}#17, salary{f}#12, language_code{r}#22, language_name{r}#23, + * does_not_exist1{r}#24, does_not_exist2{r}#51, $$does_not_exist2$converted_to$long{r}#54]] + * | \_Eval[[TOLONG(does_not_exist2{r}#51) AS $$does_not_exist2$converted_to$long#54]] + * | \_Eval[[null[INTEGER] AS language_code#22, null[KEYWORD] AS language_name#23, null[NULL] AS does_not_exist1#24, + * null[NULL] AS does_not_exist2#50]] + * | \_EsRelation[test][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{r}#25, emp_no{r}#26, first_name{r}#27, gender{r}#28, hire_date{r}#29, job{r}#30, job.raw{r}#31, + * languages{r}#32, last_name{r}#33, long_noidx{r}#34, salary{r}#35, language_code{f}#18, language_name{f}#19, + * does_not_exist1{r}#20, does_not_exist2{r}#52, $$does_not_exist2$converted_to$long{r}#55]] + * \_Eval[[TOLONG(does_not_exist2{r}#52) AS $$does_not_exist2$converted_to$long#55]] + * \_Eval[[null[KEYWORD] AS _meta_field#25, null[INTEGER] AS emp_no#26, null[KEYWORD] AS first_name#27, + * null[TEXT] AS gender#28, null[DATETIME] AS hire_date#29, null[TEXT] AS job#30, null[KEYWORD] AS job.raw#31, + * null[INTEGER] AS languages#32, null[KEYWORD] AS last_name#33, null[LONG] AS long_noidx#34, + * null[INTEGER] AS salary#35]] + * \_Subquery[] + * \_Filter[TOLONG(does_not_exist1{r}#20) > 1[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist1#20, null[NULL] AS does_not_exist2#50]] + * \_EsRelation[languages][language_code{f}#18, language_name{f}#19] + */ + public void testSubqueryAndMainQuery() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test, + (FROM languages + | WHERE does_not_exist1::LONG > 1) + | WHERE does_not_exist2::LONG < 10 AND emp_no > 0 + """)); + + // Top implicit limit + var project = as(plan, Project.class); + assertThat( + Expressions.names(project.output()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "language_code", + "language_name", + "does_not_exist1", + "does_not_exist2" + ) + ) + ); + + var limit = as(project.child(), Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // Top filter: TOLONG(does_not_exist2) < 10 AND emp_no > 0 + var topFilter = as(limit.child(), Filter.class); + var topAnd = as(topFilter.condition(), And.class); + + var leftCond = as(topAnd.left(), LessThan.class); + var leftToLong = as(leftCond.left(), ReferenceAttribute.class); + assertThat(Expressions.name(leftToLong), is("$$does_not_exist2$converted_to$long")); + assertThat(as(leftCond.right(), Literal.class).value(), is(10)); + + var rightCond = as(topAnd.right(), GreaterThan.class); + var rightAttr = as(rightCond.left(), ReferenceAttribute.class); + assertThat(rightAttr.name(), is("emp_no")); + assertThat(as(rightCond.right(), Literal.class).value(), is(0)); + + // UnionAll with two branches + var union = as(topFilter.child(), UnionAll.class); + assertThat(union.children(), hasSize(2)); + + // Left branch: EsRelation[test] with EsqlProject + Eval nulls + var leftLimit = as(union.children().get(0), Limit.class); + assertThat(leftLimit.limit().fold(FoldContext.small()), is(1000)); + + var leftProject = as(leftLimit.child(), Project.class); + var leftEval = as(leftProject.child(), Eval.class); + assertThat(Expressions.names(leftEval.fields()), is(List.of("$$does_not_exist2$converted_to$long"))); + var leftEvalEval = as(leftEval.child(), Eval.class); + var leftLangCode = as(leftEvalEval.fields().get(0), Alias.class); + assertThat(leftLangCode.name(), is("language_code")); + assertThat(as(leftLangCode.child(), Literal.class).dataType(), is(DataType.INTEGER)); + var leftLangName = as(leftEvalEval.fields().get(1), Alias.class); + assertThat(leftLangName.name(), is("language_name")); + assertThat(as(leftLangName.child(), Literal.class).dataType(), is(DataType.KEYWORD)); + var leftDne1 = as(leftEvalEval.fields().get(2), Alias.class); + assertThat(leftDne1.name(), is("does_not_exist1")); + assertThat(as(leftDne1.child(), Literal.class).dataType(), is(DataType.NULL)); + + var leftRel = as(leftEvalEval.child(), EsRelation.class); + assertThat(leftRel.indexPattern(), is("test")); + + // Right branch: EsqlProject + Eval many nulls, Subquery -> Filter -> Eval -> EsRelation[languages] + var rightLimit = as(union.children().get(1), Limit.class); + assertThat(rightLimit.limit().fold(FoldContext.small()), is(1000)); + + var rightProject = as(rightLimit.child(), Project.class); + var rightEval = as(rightProject.child(), Eval.class); + assertThat(Expressions.names(rightEval.fields()), is(List.of("$$does_not_exist2$converted_to$long"))); + var rightEvalEval = as(rightEval.child(), Eval.class); + assertThat( + Expressions.names(rightEvalEval.fields()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary" + ) + ) + ); + + var rightSub = as(rightEvalEval.child(), Subquery.class); + var rightSubFilter = as(rightSub.child(), Filter.class); + var rightGt = as(rightSubFilter.condition(), GreaterThan.class); + var rightToLongOnDne1 = as(rightGt.left(), ToLong.class); + assertThat(Expressions.name(rightToLongOnDne1.field()), is("does_not_exist1")); + + var rightSubEval = as(rightSubFilter.child(), Eval.class); + assertThat(Expressions.names(rightSubEval.fields()), is(List.of("does_not_exist1", "does_not_exist2"))); + + var rightRel = as(rightSubEval.child(), EsRelation.class); + assertThat(rightRel.indexPattern(), is("languages")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[emp_no{f}#11,ASC,LAST], Order[emp_no_plus{r}#6,ASC,LAST]]] + * \_EsqlProject[[emp_no{f}#11, emp_no_foo{r}#22, emp_no_plus{r}#6]] + * \_Filter[emp_no{f}#11 < 10003[INTEGER]] + * \_Eval[[TOLONG(emp_no_foo{r}#22) + 1[INTEGER] AS emp_no_plus#6]] + * \_Eval[[null[NULL] AS emp_no_foo#22]] + * \_EsRelation[employees][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testSubqueryMix() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM employees + | EVAL emp_no_plus = emp_no_foo::LONG + 1 + | WHERE emp_no < 10003) + | KEEP emp_no* + | SORT emp_no, emp_no_plus + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var orderBy = as(limit.child(), OrderBy.class); + assertThat(orderBy.order(), hasSize(2)); + assertThat(Expressions.name(orderBy.order().get(0).child()), is("emp_no")); + assertThat(Expressions.name(orderBy.order().get(1).child()), is("emp_no_plus")); + + var project = as(orderBy.child(), Project.class); + assertThat(project.projections(), hasSize(3)); + assertThat(Expressions.names(project.projections()), is(List.of("emp_no", "emp_no_foo", "emp_no_plus"))); + + var filter = as(project.child(), Filter.class); + assertThat(Expressions.name(filter.condition()), is("emp_no < 10003")); + + var evalPlus = as(filter.child(), Eval.class); + assertThat(evalPlus.fields(), hasSize(1)); + var aliasPlus = as(evalPlus.fields().getFirst(), Alias.class); + assertThat(aliasPlus.name(), is("emp_no_plus")); + assertThat(Expressions.name(aliasPlus.child()), is("emp_no_foo::LONG + 1")); + + var evalFoo = as(evalPlus.child(), Eval.class); + assertThat(evalFoo.fields(), hasSize(1)); + var aliasFoo = as(evalFoo.fields().getFirst(), Alias.class); + assertThat(aliasFoo.name(), is("emp_no_foo")); + assertThat(as(aliasFoo.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(evalFoo.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("employees")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[emp_no{f}#11,ASC,LAST], Order[emp_no_plus{r}#6,ASC,LAST]]] + * \_EsqlProject[[_meta_field{f}#17, emp_no{f}#11, gender{f}#13, hire_date{f}#18, job{f}#19, job.raw{f}#20, languages{f}#14, + * long_noidx{f}#21, salary{f}#16, emp_no_foo{r}#22, emp_no_plus{r}#6]] + * \_Filter[emp_no{f}#11 < 10003[INTEGER]] + * \_Eval[[TOLONG(emp_no_foo{r}#22) + 1[INTEGER] AS emp_no_plus#6]] + * \_Eval[[null[NULL] AS emp_no_foo#22]] + * \_EsRelation[employees][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testSubqueryMixWithDropPattern() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM employees + | EVAL emp_no_plus = emp_no_foo::LONG + 1 + | WHERE emp_no < 10003) + | DROP *_name + | SORT emp_no, emp_no_plus + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var orderBy = as(limit.child(), OrderBy.class); + assertThat(orderBy.order(), hasSize(2)); + assertThat(Expressions.name(orderBy.order().get(0).child()), is("emp_no")); + assertThat(Expressions.name(orderBy.order().get(1).child()), is("emp_no_plus")); + + var project = as(orderBy.child(), Project.class); + assertThat(project.projections(), hasSize(11)); + assertThat( + Expressions.names(project.projections()), + is( + List.of( + "_meta_field", + "emp_no", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "long_noidx", + "salary", + "emp_no_foo", + "emp_no_plus" + ) + ) + ); + + var filter = as(project.child(), Filter.class); + assertThat(Expressions.name(filter.condition()), is("emp_no < 10003")); + + var evalPlus = as(filter.child(), Eval.class); + assertThat(evalPlus.fields(), hasSize(1)); + var aliasPlus = as(evalPlus.fields().getFirst(), Alias.class); + assertThat(aliasPlus.name(), is("emp_no_plus")); + assertThat(Expressions.name(aliasPlus.child()), is("emp_no_foo::LONG + 1")); + + var evalFoo = as(evalPlus.child(), Eval.class); + assertThat(evalFoo.fields(), hasSize(1)); + var aliasFoo = as(evalFoo.fields().getFirst(), Alias.class); + assertThat(aliasFoo.name(), is("emp_no_foo")); + assertThat(as(aliasFoo.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(evalFoo.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("employees")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[does_not_exist{r}#19,ASC,LAST]]] + * \_Aggregate[[does_not_exist{r}#19],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#5, does_not_exist{r}#19]] + * \_Eval[[null[NULL] AS does_not_exist#19]] + * \_EsRelation[employees][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testSubqueryAfterUnionAllOfStats() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM + (FROM employees + | STATS c = COUNT(*) BY does_not_exist) + | SORT does_not_exist + """)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // OrderBy over the Aggregate-produced grouping + var orderBy = as(limit.child(), OrderBy.class); + assertThat(orderBy.order(), hasSize(1)); + assertThat(Expressions.name(orderBy.order().get(0).child()), is("does_not_exist")); + + // Aggregate with grouping by does_not_exist + var agg = as(orderBy.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(1)); + assertThat(Expressions.name(agg.groupings().get(0)), is("does_not_exist")); + assertThat(agg.aggregates(), hasSize(2)); // c and does_not_exist + + // Eval introduces does_not_exist as NULL + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias = as(eval.fields().get(0), Alias.class); + assertThat(alias.name(), is("does_not_exist")); + assertThat(as(alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + // Underlying relation + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("employees")); + } + + /** + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[does_not_exist{r}#53,ASC,LAST]]] + * \_UnionAll[[_meta_field{r}#41, emp_no{r}#42, first_name{r}#43, gender{r}#44, hire_date{r}#45, job{r}#46, job.raw{r}#47, + * languages{r}#48, last_name{r}#49, long_noidx{r}#50, salary{r}#51, c{r}#52, does_not_exist{r}#53]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, gender{f}#9, hire_date{f}#14, job{f}#15, job.raw{f}#16, + * languages{f}#10, last_name{f}#11, long_noidx{f}#17, salary{f}#12, c{r}#29, does_not_exist{r}#54]] + * | \_Eval[[null[LONG] AS c#29, null[NULL] AS does_not_exist#53]] + * | \_EsRelation[employees][_meta_field{f}#13, emp_no{f}#7, first_name{f}#8, ge..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{r}#30, emp_no{r}#31, first_name{r}#32, gender{r}#33, hire_date{r}#34, job{r}#35, job.raw{r}#36, + * languages{r}#37, last_name{r}#38, long_noidx{r}#39, salary{r}#40, c{r}#4, does_not_exist{r}#55]] + * \_Eval[[null[NULL] AS does_not_exist#56]] + * \_Eval[[null[KEYWORD] AS _meta_field#30, null[INTEGER] AS emp_no#31, null[KEYWORD] AS first_name#32, + * null[TEXT] AS gender#33, null[DATETIME] AS hire_date#34, null[TEXT] AS job#35, null[KEYWORD] AS job.raw#36, + * null[INTEGER] AS languages#37, null[KEYWORD] AS last_name#38, null[LONG] AS long_noidx#39, + * null[INTEGER] AS salary#40]] + * \_Subquery[] + * \_Aggregate[[],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#4]] + * \_Eval[[null[NULL] AS does_not_exist#53]] + * \_EsRelation[employees][_meta_field{f}#24, emp_no{f}#18, first_name{f}#19, . + */ + public void testSubqueryAfterUnionAllOfStatsAndMain() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM employees, + (FROM employees + | STATS c = COUNT(*)) + | SORT does_not_exist + """)); + + // TODO: golden testing + assertThat( + Expressions.names(plan.output()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "c", + "does_not_exist" + ) + ) + ); + } + + public void testFailAfterUnionAllOfStats() { + var query = """ + FROM + (FROM employees + | STATS c = COUNT(*)) + | SORT does_not_exist + """; + var failure = "line 4:8: Unknown column [does_not_exist]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Project[[_meta_field{r}#53, emp_no{r}#54, first_name{r}#55, gender{r}#56, hire_date{r}#57, job{r}#58, job.raw{r}#59, + * languages{r}#60, last_name{r}#61, long_noidx{r}#62, salary{r}#63, language_code{r}#64, language_name{r}#65, + * does_not_exist1{r}#66, does_not_exist2{r}#71]] + * \_Limit[1000[INTEGER],false,false] + * \_Filter[$$does_not_exist2$converted_to$long{r$}#79 < 10[INTEGER] AND emp_no{r}#54 > 0[INTEGER] + * OR $$does_not_exist1$converted_to$long{r$}#70 < 11[INTEGER]] + * \_UnionAll[[_meta_field{r}#53, emp_no{r}#54, first_name{r}#55, gender{r}#56, hire_date{r}#57, job{r}#58, job.raw{r}#59, + * languages{r}#60, last_name{r}#61, long_noidx{r}#62, salary{r}#63, language_code{r}#64, language_name{r}#65, + * does_not_exist1{r}#66, $$does_not_exist1$converted_to$long{r$}#70, does_not_exist2{r}#71, + * $$does_not_exist2$converted_to$long{r$}#79]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, gender{f}#11, hire_date{f}#16, job{f}#17, job.raw{f}#18, + * languages{f}#12, last_name{f}#13, long_noidx{f}#19, salary{f}#14, language_code{r}#28, language_name{r}#29, + * does_not_exist1{r}#30, $$does_not_exist1$converted_to$long{r}#67, does_not_exist2{r}#72, + * $$does_not_exist2$converted_to$long{r}#76]] + * | \_Eval[[TOLONG(does_not_exist2{r}#72) AS $$does_not_exist2$converted_to$long#76]] + * | \_Eval[[TOLONG(does_not_exist1{r}#30) AS $$does_not_exist1$converted_to$long#67]] + * | \_Eval[[null[INTEGER] AS language_code#28, null[KEYWORD] AS language_name#29, null[NULL] AS does_not_exist1#30, + * null[NULL] AS does_not_exist2#71]] + * | \_EsRelation[test][_meta_field{f}#15, emp_no{f}#9, first_name{f}#10, g..] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{r}#31, emp_no{r}#32, first_name{r}#33, gender{r}#34, hire_date{r}#35, job{r}#36, job.raw{r}#37, + * languages{r}#38, last_name{r}#39, long_noidx{r}#40, salary{r}#41, language_code{f}#20, language_name{f}#21, + * does_not_exist1{r}#24, $$does_not_exist1$converted_to$long{r}#68, does_not_exist2{r}#73, + * $$does_not_exist2$converted_to$long{r}#77]] + * | \_Eval[[TOLONG(does_not_exist2{r}#73) AS $$does_not_exist2$converted_to$long#77]] + * | \_Eval[[TOLONG(does_not_exist1{r}#24) AS $$does_not_exist1$converted_to$long#68]] + * | \_Eval[[null[KEYWORD] AS _meta_field#31, null[INTEGER] AS emp_no#32, null[KEYWORD] AS first_name#33, + * null[TEXT] AS gender#34, null[DATETIME] AS hire_date#35, null[TEXT] AS job#36, null[KEYWORD] AS job.raw#37, + * null[INTEGER] AS languages#38, null[KEYWORD] AS last_name#39, null[LONG] AS long_noidx#40, + * null[INTEGER] AS salary#41]] + * | \_Subquery[] + * | \_Filter[TOLONG(does_not_exist1{r}#24) > 1[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#24, null[NULL] AS does_not_exist2#71]] + * | \_EsRelation[languages][language_code{f}#20, language_name{f}#21] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{r}#42, emp_no{r}#43, first_name{r}#44, gender{r}#45, hire_date{r}#46, job{r}#47, job.raw{r}#48, + * languages{r}#49, last_name{r}#50, long_noidx{r}#51, salary{r}#52, language_code{f}#22, language_name{f}#23, + * does_not_exist1{r}#26, $$does_not_exist1$converted_to$long{r}#69, does_not_exist2{r}#74, + * $$does_not_exist2$converted_to$long{r}#78]] + * \_Eval[[TOLONG(does_not_exist2{r}#74) AS $$does_not_exist2$converted_to$long#78]] + * \_Eval[[TOLONG(does_not_exist1{r}#26) AS $$does_not_exist1$converted_to$long#69]] + * \_Eval[[null[KEYWORD] AS _meta_field#42, null[INTEGER] AS emp_no#43, null[KEYWORD] AS first_name#44, + * null[TEXT] AS gender#45, null[DATETIME] AS hire_date#46, null[TEXT] AS job#47, null[KEYWORD] AS job.raw#48, + * null[INTEGER] AS languages#49, null[KEYWORD] AS last_name#50, null[LONG] AS long_noidx#51, + * null[INTEGER] AS salary#52]] + * \_Subquery[] + * \_Filter[TOLONG(does_not_exist1{r}#26) > 2[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist1#26, null[NULL] AS does_not_exist2#71]] + * \_EsRelation[languages][language_code{f}#22, language_name{f}#23] + */ + public void testSubquerysWithMainAndSameOptional() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test, + (FROM languages + | WHERE does_not_exist1::LONG > 1), + (FROM languages + | WHERE does_not_exist1::LONG > 2) + | WHERE does_not_exist2::LONG < 10 AND emp_no > 0 OR does_not_exist1::LONG < 11 + """)); + + // Top Project + var topProject = as(plan, Project.class); + + // Top implicit limit + var topLimit = as(topProject.child(), Limit.class); + assertThat(topLimit.limit().fold(FoldContext.small()), is(1000)); + + // Top filter: TOLONG(does_not_exist2) < 10 AND emp_no > 0 OR $$does_not_exist1$converted_to$long < 11 + var topFilter = as(topLimit.child(), Filter.class); + var topOr = as(topFilter.condition(), Or.class); + + var leftAnd = as(topOr.left(), And.class); + var andLeftLt = as(leftAnd.left(), LessThan.class); + var andLeftToLong = as(andLeftLt.left(), ReferenceAttribute.class); + assertThat(andLeftToLong.name(), is("$$does_not_exist2$converted_to$long")); + assertThat(as(andLeftLt.right(), Literal.class).value(), is(10)); + + var andRightGt = as(leftAnd.right(), GreaterThan.class); + var andRightAttr = as(andRightGt.left(), ReferenceAttribute.class); + assertThat(andRightAttr.name(), is("emp_no")); + assertThat(as(andRightGt.right(), Literal.class).value(), is(0)); + + var rightLt = as(topOr.right(), LessThan.class); + var rightAttr = as(rightLt.left(), ReferenceAttribute.class); + assertThat(rightAttr.name(), is("$$does_not_exist1$converted_to$long")); + assertThat(as(rightLt.right(), Literal.class).value(), is(11)); + + // UnionAll with three branches + var union = as(topFilter.child(), UnionAll.class); + assertThat(union.children(), hasSize(3)); + + // Branch 1: EsRelation[test] with EsqlProject + Eval(null language_code/name/dne1) + Eval(TOLONG does_not_exist1) + var b1Limit = as(union.children().get(0), Limit.class); + assertThat(b1Limit.limit().fold(FoldContext.small()), is(1000)); + + var b1Project = as(b1Limit.child(), Project.class); + var b1EvalToLong = as(b1Project.child(), Eval.class); + assertThat(b1EvalToLong.fields(), hasSize(1)); + var b1Converted = as(b1EvalToLong.fields().getFirst(), Alias.class); + assertThat(b1Converted.name(), is("$$does_not_exist2$converted_to$long")); + var b1ToLong = as(b1Converted.child(), ToLong.class); + assertThat(Expressions.name(b1ToLong.field()), is("does_not_exist2")); + + var b1EvalConvert = as(b1EvalToLong.child(), Eval.class); + assertThat(Expressions.names(b1EvalConvert.fields()), is(List.of("$$does_not_exist1$converted_to$long"))); + var b1EvalNulls = as(b1EvalConvert.child(), Eval.class); + assertThat( + Expressions.names(b1EvalNulls.fields()), + is(List.of("language_code", "language_name", "does_not_exist1", "does_not_exist2")) + ); + + var b1Rel = as(b1EvalNulls.child(), EsRelation.class); + assertThat(b1Rel.indexPattern(), is("test")); + + // Branch 2: Subquery[languages] with Filter TOLONG(does_not_exist1) > 1, wrapped by EsqlProject nulls + Eval(TOLONG dne1) + var b2Limit = as(union.children().get(1), Limit.class); + assertThat(b2Limit.limit().fold(FoldContext.small()), is(1000)); + + var b2Project = as(b2Limit.child(), Project.class); + var b2EvalToLong = as(b2Project.child(), Eval.class); + assertThat(b2EvalToLong.fields(), hasSize(1)); + var b2Converted = as(b2EvalToLong.fields().getFirst(), Alias.class); + assertThat(b2Converted.name(), is("$$does_not_exist2$converted_to$long")); + var b2ToLong = as(b2Converted.child(), ToLong.class); + assertThat(Expressions.name(b2ToLong.field()), is("does_not_exist2")); + + var b2EvalConvert = as(b2EvalToLong.child(), Eval.class); + assertThat(Expressions.names(b2EvalConvert.fields()), is(List.of("$$does_not_exist1$converted_to$long"))); + var b2EvalNulls = as(b2EvalConvert.child(), Eval.class); + assertThat(b2EvalNulls.fields(), hasSize(11)); // null meta+many fields + + var b2Sub = as(b2EvalNulls.child(), Subquery.class); + var b2Filter = as(b2Sub.child(), Filter.class); + var b2Gt = as(b2Filter.condition(), GreaterThan.class); + var b2GtToLong = as(b2Gt.left(), ToLong.class); + assertThat(Expressions.name(b2GtToLong.field()), is("does_not_exist1")); + var b2SubEval = as(b2Filter.child(), Eval.class); + assertThat(Expressions.names(b2SubEval.fields()), is(List.of("does_not_exist1", "does_not_exist2"))); + var b2Rel = as(b2SubEval.child(), EsRelation.class); + assertThat(b2Rel.indexPattern(), is("languages")); + + // Branch 3: Subquery[languages] with Filter TOLONG(does_not_exist1) > 2, wrapped by EsqlProject nulls + Eval(TOLONG dne1) + var b3Limit = as(union.children().get(2), Limit.class); + assertThat(b3Limit.limit().fold(FoldContext.small()), is(1000)); + + var b3Project = as(b3Limit.child(), Project.class); + var b3EvalToLong = as(b3Project.child(), Eval.class); + assertThat(b3EvalToLong.fields(), hasSize(1)); + var b3Converted = as(b3EvalToLong.fields().getFirst(), Alias.class); + assertThat(b3Converted.name(), is("$$does_not_exist2$converted_to$long")); + var b3ToLong = as(b3Converted.child(), ToLong.class); + assertThat(Expressions.name(b3ToLong.field()), is("does_not_exist2")); + + var b3EvalConversion = as(b3EvalToLong.child(), Eval.class); + assertThat(Expressions.names(b3EvalConversion.fields()), is(List.of("$$does_not_exist1$converted_to$long"))); + var b3EvalNulls = as(b3EvalConversion.child(), Eval.class); + assertThat(b3EvalNulls.fields(), hasSize(11)); + var b3Sub = as(b3EvalNulls.child(), Subquery.class); + var b3Filter = as(b3Sub.child(), Filter.class); + var b3Gt = as(b3Filter.condition(), GreaterThan.class); + var b3GtToLong = as(b3Gt.left(), ToLong.class); + assertThat(Expressions.name(b3GtToLong.field()), is("does_not_exist1")); + var b3SubEval = as(b3Filter.child(), Eval.class); + assertThat(Expressions.names(b3SubEval.fields()), is(List.of("does_not_exist1", "does_not_exist2"))); + var b3Rel = as(b3SubEval.child(), EsRelation.class); + assertThat(b3Rel.indexPattern(), is("languages")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_MvExpand[languageCode{r}#24,languageCode{r}#128] + * \_EsqlProject[[count(*){r}#18, emp_no{r}#92 AS empNo#21, language_code{r}#102 AS languageCode#24, does_not_exist2{r}#119]] + * \_Aggregate[[emp_no{r}#92, language_code{r}#102, does_not_exist2{r}#119],[COUNT(*[KEYWORD],true[BOOLEAN], + * PT0S[TIME_DURATION]) AS count(*)#18, emp_no{r}#92, language_code{r}#102, does_not_exist2{r}#119]] + * \_Filter[emp_no{r}#92 > 10000[INTEGER] OR $$does_not_exist1$converted_to$long{r$}#118 < 10[INTEGER]] + * \_UnionAll[[_meta_field{r}#91, emp_no{r}#92, first_name{r}#93, gender{r}#94, hire_date{r}#95, job{r}#96, job.raw{r}#97, + * languages{r}#98, last_name{r}#99, long_noidx{r}#100, salary{r}#101, language_code{r}#102, languageName{r}#103, + * max(@timestamp){r}#104, language_name{r}#105, does_not_exist1{r}#106, + * $$does_not_exist1$converted_to$long{r$}#118, does_not_exist2{r}#119]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#34, emp_no{f}#28, first_name{f}#29, gender{f}#30, hire_date{f}#35, job{f}#36, + * job.raw{f}#37, languages{f}#31, last_name{f}#32, long_noidx{f}#38, salary{f}#33, language_code{r}#58, + * languageName{r}#59, max(@timestamp){r}#60, language_name{r}#61, does_not_exist1{r}#107, + * $$does_not_exist1$converted_to$long{r}#114, does_not_exist2{r}#120]] + * | \_Eval[[TOLONG(does_not_exist1{r}#107) AS $$does_not_exist1$converted_to$long#114]] + * | \_Eval[[null[INTEGER] AS language_code#58, null[KEYWORD] AS languageName#59, null[DATETIME] AS max(@timestamp)#60, + * null[KEYWORD] AS language_name#61, null[NULL] AS does_not_exist1#106, null[NULL] AS does_not_exist2#119]] + * | \_EsRelation[test][_meta_field{f}#34, emp_no{f}#28, first_name{f}#29, ..] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{r}#62, emp_no{r}#63, first_name{r}#64, gender{r}#65, hire_date{r}#66, job{r}#67, + * job.raw{r}#68, languages{r}#69, last_name{r}#70, long_noidx{r}#71, salary{r}#72, language_code{f}#39, + * languageName{r}#6, max(@timestamp){r}#73, language_name{r}#74, does_not_exist1{r}#108, + * $$does_not_exist1$converted_to$long{r}#115, does_not_exist2{r}#121]] + * | \_Eval[[null[NULL] AS does_not_exist2#122]] + * | \_Eval[[TOLONG(does_not_exist1{r}#108) AS $$does_not_exist1$converted_to$long#115]] + * | \_Eval[[null[NULL] AS does_not_exist1#109]] + * | \_Eval[[null[KEYWORD] AS _meta_field#62, null[INTEGER] AS emp_no#63, null[KEYWORD] AS first_name#64, + * null[TEXT] AS gender#65, null[DATETIME] AS hire_date#66, null[TEXT] AS job#67, + * null[KEYWORD] AS job.raw#68, null[INTEGER] AS languages#69, null[KEYWORD] AS last_name#70, + * null[LONG] AS long_noidx#71, null[INTEGER] AS salary#72, null[DATETIME] AS max(@timestamp)#73, + * null[KEYWORD] AS language_name#74]] + * | \_Subquery[] + * | \_EsqlProject[[language_code{f}#39, language_name{f}#40 AS languageName#6]] + * | \_Filter[language_code{f}#39 > 10[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#106, null[NULL] AS does_not_exist2#119]] + * | \_EsRelation[languages][language_code{f}#39, language_name{f}#40] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{r}#75, emp_no{r}#76, first_name{r}#77, gender{r}#78, hire_date{r}#79, job{r}#80, + * job.raw{r}#81, languages{r}#82, last_name{r}#83, long_noidx{r}#84, salary{r}#85, language_code{r}#86, + * languageName{r}#87, max(@timestamp){r}#8, language_name{r}#88, does_not_exist1{r}#110, + * $$does_not_exist1$converted_to$long{r}#116, does_not_exist2{r}#123]] + * | \_Eval[[null[NULL] AS does_not_exist2#124]] + * | \_Eval[[TOLONG(does_not_exist1{r}#110) AS $$does_not_exist1$converted_to$long#116]] + * | \_Eval[[null[NULL] AS does_not_exist1#111]] + * | \_Eval[[null[KEYWORD] AS _meta_field#75, null[INTEGER] AS emp_no#76, null[KEYWORD] AS first_name#77, + * null[TEXT] AS gender#78, null[DATETIME] AS hire_date#79, null[TEXT] AS job#80, + * null[KEYWORD] AS job.raw#81, null[INTEGER] AS languages#82, null[KEYWORD] AS last_name#83, + * null[LONG] AS long_noidx#84, null[INTEGER] AS salary#85, null[INTEGER] AS language_code#86, + * null[KEYWORD] AS languageName#87, null[KEYWORD] AS language_name#88]] + * | \_Subquery[] + * | \_Aggregate[[],[MAX(@timestamp{f}#41,true[BOOLEAN],PT0S[TIME_DURATION]) AS max(@timestamp)#8]] + * | \_Eval[[null[NULL] AS does_not_exist1#106, null[NULL] AS does_not_exist2#119]] + * | \_EsRelation[sample_data][@timestamp{f}#41, client_ip{f}#42, event_duration{f..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{f}#51, emp_no{f}#45, first_name{f}#46, gender{f}#47, hire_date{f}#52, job{f}#53, + * job.raw{f}#54, languages{f}#48, last_name{f}#49, long_noidx{f}#55, salary{f}#50, language_code{r}#12, + * languageName{r}#89, max(@timestamp){r}#90, language_name{f}#57, does_not_exist1{r}#112, + * $$does_not_exist1$converted_to$long{r}#117, does_not_exist2{r}#125]] + * \_Eval[[TOLONG(does_not_exist1{r}#112) AS $$does_not_exist1$converted_to$long#117]] + * \_Eval[[null[KEYWORD] AS languageName#89, null[DATETIME] AS max(@timestamp)#90]] + * \_Subquery[] + * \_LookupJoin[LEFT,[language_code{r}#12],[language_code{f}#56],false,null] + * |_Eval[[languages{f}#48 AS language_code#12, null[NULL] AS does_not_exist1#106, + * null[NULL] AS does_not_exist2#119]] + * | \_EsRelation[test][_meta_field{f}#51, emp_no{f}#45, first_name{f}#46, ..] + * \_Eval[[null[NULL] AS does_not_exist1#106, null[NULL] AS does_not_exist2#119]] + * \_EsRelation[languages_lookup][LOOKUP][language_code{f}#56, language_name{f}#57] + */ + public void testSubquerysMixAndLookupJoinNullify() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test, + (FROM languages + | WHERE language_code > 10 + | RENAME language_name as languageName), + (FROM sample_data + | STATS max(@timestamp)), + (FROM test + | EVAL language_code = languages + | LOOKUP JOIN languages_lookup ON language_code) + | WHERE emp_no > 10000 OR does_not_exist1::LONG < 10 + | STATS count(*) BY emp_no, language_code, does_not_exist2 + | RENAME emp_no AS empNo, language_code AS languageCode + | MV_EXPAND languageCode + """)); + + // TODO: golden testing + assertThat(plan instanceof Limit, is(true)); + // assertThat(Expressions.names(plan.output()), is(List.of("count(*)", "empNo", "languageCode", "does_not_exist2"))); + } + + // same tree as above, except for the source nodes + public void testSubquerysMixAndLookupJoinLoad() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedLoad(""" + FROM test, + (FROM languages + | WHERE language_code > 10 + | RENAME language_name as languageName), + (FROM sample_data + | STATS max(@timestamp)), + (FROM test + | EVAL language_code = languages + | LOOKUP JOIN languages_lookup ON language_code) + | WHERE emp_no > 10000 OR does_not_exist1::LONG < 10 + | STATS count(*) BY emp_no, language_code, does_not_exist2 + | RENAME emp_no AS empNo, language_code AS languageCode + | MV_EXPAND languageCode + """)); + + // TODO: golden testing + assertThat(Expressions.names(plan.output()), is(List.of("count(*)", "empNo", "languageCode", "does_not_exist2"))); + + List esRelations = plan.collect(EsRelation.class); + assertThat( + esRelations.stream().map(EsRelation::indexPattern).toList(), + is( + List.of( + "test", // FROM + "languages", + "sample_data", + "test", // LOOKUP JOIN + "languages_lookup" + ) + ) + ); + for (var esr : esRelations) { + if (esr.indexMode() != IndexMode.LOOKUP) { + var dne = esr.output().stream().filter(a -> a.name().startsWith("does_not_exist")).toList(); + assertThat(dne.size(), is(2)); + var dne1 = as(dne.getFirst(), FieldAttribute.class); + var dne2 = as(dne.getLast(), FieldAttribute.class); + var pukesf1 = as(dne1.field(), PotentiallyUnmappedKeywordEsField.class); + var pukesf2 = as(dne2.field(), PotentiallyUnmappedKeywordEsField.class); + assertThat(pukesf1.getName(), is("does_not_exist1")); + assertThat(pukesf2.getName(), is("does_not_exist2")); + } + } + } + + public void testFailSubquerysWithNoMainAndStatsOnlyNullify() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var query = """ + FROM + (FROM languages + | STATS c = COUNT(*) BY emp_no, does_not_exist1), + (FROM languages + | STATS a = AVG(salary::LONG)) + | WHERE does_not_exist2::LONG < 10 + """; + var failure = "line 6:9: Unknown column [does_not_exist2], did you mean [does_not_exist1]?"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + public void testFailSubquerysWithNoMainAndStatsOnlyLoad() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var query = """ + FROM + (FROM languages + | STATS c = COUNT(*) BY emp_no, does_not_exist1), + (FROM languages + | STATS a = AVG(salary::LONG)) + | WHERE does_not_exist2::LONG < 10 + """; + var failure = "line 6:9: Unknown column [does_not_exist2], did you mean [does_not_exist1]?"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Project[[_meta_field{r}#65, emp_no{r}#66, first_name{r}#67, gender{r}#68, hire_date{r}#69, job{r}#70, job.raw{r}#71, + * languages{r}#72, last_name{r}#73, long_noidx{r}#74, salary{r}#75, c{r}#76, does_not_exist1{r}#77, a{r}#78, + * does_not_exist2{r}#82, does_not_exist3{r}#93, x{r}#13]] + * \_Limit[1000[INTEGER],false,false] + * \_Eval[[does_not_exist3{r}#93 AS x#13]] + * \_Filter[$$does_not_exist2$converted_to$long{r$}#92 < 10[INTEGER]] + * \_UnionAll[[_meta_field{r}#65, emp_no{r}#66, first_name{r}#67, gender{r}#68, hire_date{r}#69, job{r}#70, job.raw{r}#71, + * languages{r}#72, last_name{r}#73, long_noidx{r}#74, salary{r}#75, c{r}#76, does_not_exist1{r}#77, a{r}#78, + * does_not_exist2{r}#82, $$does_not_exist2$converted_to$long{r$}#92, does_not_exist3{r}#93]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#21, emp_no{r}#79, first_name{f}#16, gender{f}#17, hire_date{f}#22, job{f}#23, job.raw{f}#24, + * languages{f}#18, last_name{f}#19, long_noidx{f}#25, salary{f}#20, c{r}#38, does_not_exist1{r}#39, a{r}#40, + * does_not_exist2{r}#83, $$does_not_exist2$converted_to$long{r}#89, does_not_exist3{r}#94]] + * | \_Eval[[TOLONG(does_not_exist2{r}#83) AS $$does_not_exist2$converted_to$long#89]] + * | \_Eval[[null[KEYWORD] AS emp_no#79]] + * | \_Eval[[null[LONG] AS c#38, null[NULL] AS does_not_exist1#39, null[DOUBLE] AS a#40, + * null[NULL] AS does_not_exist2#82, null[NULL] AS does_not_exist3#93]] + * | \_EsRelation[test][_meta_field{f}#21, emp_no{f}#15, first_name{f}#16, ..] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{r}#41, emp_no{r}#80, first_name{r}#42, gender{r}#43, hire_date{r}#44, job{r}#45, job.raw{r}#46, + * languages{r}#47, last_name{r}#48, long_noidx{r}#49, salary{r}#50, c{r}#6, does_not_exist1{r}#31, a{r}#51, + * does_not_exist2{r}#84, $$does_not_exist2$converted_to$long{r}#90, does_not_exist3{r}#95]] + * | \_Eval[[null[NULL] AS does_not_exist3#96]] + * | \_Eval[[TOLONG(does_not_exist2{r}#84) AS $$does_not_exist2$converted_to$long#90]] + * | \_Eval[[null[NULL] AS does_not_exist2#85]] + * | \_Eval[[null[KEYWORD] AS emp_no#80]] + * | \_Eval[[null[KEYWORD] AS _meta_field#41, null[KEYWORD] AS first_name#42, null[TEXT] AS gender#43, + * null[DATETIME] AS hire_date#44, null[TEXT] AS job#45, null[KEYWORD] AS job.raw#46, + * null[INTEGER] AS languages#47, null[KEYWORD] AS last_name#48, null[LONG] AS long_noidx#49, + * null[INTEGER] AS salary#50, null[DOUBLE] AS a#51]] + * | \_Subquery[] + * | \_Aggregate[[emp_no{r}#30, does_not_exist1{r}#31],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) + * AS c#6, emp_no{r}#30, does_not_exist1{r}#31]] + * | \_Eval[[null[NULL] AS emp_no#30, null[NULL] AS does_not_exist1#31, null[NULL] AS does_not_exist2#82, + * null[NULL] AS does_not_exist3#93]] + * | \_EsRelation[languages][language_code{f}#26, language_name{f}#27] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{r}#52, emp_no{r}#81, first_name{r}#54, gender{r}#55, hire_date{r}#56, job{r}#57, job.raw{r}#58, + * languages{r}#59, last_name{r}#60, long_noidx{r}#61, salary{r}#62, c{r}#63, does_not_exist1{r}#64, a{r}#9, + * does_not_exist2{r}#86, $$does_not_exist2$converted_to$long{r}#91, does_not_exist3{r}#97]] + * \_Eval[[null[NULL] AS does_not_exist3#98]] + * \_Eval[[TOLONG(does_not_exist2{r}#86) AS $$does_not_exist2$converted_to$long#91]] + * \_Eval[[null[NULL] AS does_not_exist2#87]] + * \_Eval[[null[KEYWORD] AS emp_no#81]] + * \_Eval[[null[KEYWORD] AS _meta_field#52, null[INTEGER] AS emp_no#53, null[KEYWORD] AS first_name#54, + * null[TEXT] AS gender#55, null[DATETIME] AS hire_date#56, null[TEXT] AS job#57, + * null[KEYWORD] AS job.raw#58, null[INTEGER] AS languages#59, null[KEYWORD] AS last_name#60, + * null[LONG] AS long_noidx#61, null[INTEGER] AS salary#62, null[LONG] AS c#63, + * null[NULL] AS does_not_exist1#64]] + * \_Subquery[] + * \_Aggregate[[],[AVG(salary{r}#36,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS a#9]] + * \_Eval[[null[NULL] AS salary#36, null[NULL] AS does_not_exist2#82, null[NULL] AS does_not_exist3#93]] + * \_EsRelation[languages][language_code{f}#28, language_name{f}#29] + */ + public void testSubquerysWithMainAndStatsOnly() { + assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); + + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test, // adding a "main" index/pattern makes does_not_exist2 & 3 resolved + (FROM languages + | STATS c = COUNT(*) BY emp_no, does_not_exist1), + (FROM languages + | STATS a = AVG(salary)) + | WHERE does_not_exist2::LONG < 10 + | EVAL x = does_not_exist3 + """)); + + // TODO: golden testing + assertThat( + Expressions.names(plan.output()), + is( + List.of( + "_meta_field", + "emp_no", + "first_name", + "gender", + "hire_date", + "job", + "job.raw", + "languages", + "last_name", + "long_noidx", + "salary", + "c", + "does_not_exist1", + "a", + "does_not_exist2", + "does_not_exist3", + "x" + ) + ) + ); + } + + /* + * Limit[10000[INTEGER],false,false] + * \_Fork[[_meta_field{r}#103, emp_no{r}#104, first_name{r}#105, gender{r}#106, hire_date{r}#107, job{r}#108, job.raw{r}#109, + * languages{r}#110, last_name{r}#111, long_noidx{r}#112, salary{r}#113, does_not_exist1{r}#114, does_not_exist2{r}#115, + * does_not_exist3{r}#116, does_not_exist2 IS NULL{r}#117, _fork{r}#118, does_not_exist4{r}#119, xyz{r}#120, x{r}#121, + * y{r}#122]] + * |_Limit[10000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#35, emp_no{f}#29, first_name{f}#30, gender{f}#31, hire_date{f}#36, job{f}#37, job.raw{f}#38, + * languages{f}#32, last_name{f}#33, long_noidx{f}#39, salary{f}#34, does_not_exist1{r}#62, does_not_exist2{r}#68, + * does_not_exist3{r}#74, does_not_exist2 IS NULL{r}#6, _fork{r}#9, does_not_exist4{r}#80, xyz{r}#81, x{r}#82, y{r}#83]] + * | \_Eval[[null[NULL] AS does_not_exist4#80, null[KEYWORD] AS xyz#81, null[DOUBLE] AS x#82, null[DOUBLE] AS y#83]] + * | \_Eval[[fork1[KEYWORD] AS _fork#9]] + * | \_Limit[7[INTEGER],false,false] + * | \_OrderBy[[Order[does_not_exist3{r}#74,ASC,LAST]]] + * | \_Filter[emp_no{f}#29 > 3[INTEGER]] + * | \_Eval[[ISNULL(does_not_exist2{r}#68) AS does_not_exist2 IS NULL#6]] + * | \_Filter[first_name{f}#30 == Chris[KEYWORD] AND TOLONG(does_not_exist1{r}#62) > 5[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#62, null[NULL] AS does_not_exist2#68, null[NULL] AS does_not_exist3#74]] + * | \_EsRelation[test][_meta_field{f}#35, emp_no{f}#29, first_name{f}#30, ..] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[_meta_field{f}#46, emp_no{f}#40, first_name{f}#41, gender{f}#42, hire_date{f}#47, job{f}#48, job.raw{f}#49, + * languages{f}#43, last_name{f}#44, long_noidx{f}#50, salary{f}#45, does_not_exist1{r}#64, does_not_exist2{r}#70, + * does_not_exist3{r}#84, does_not_exist2 IS NULL{r}#6, _fork{r}#9, does_not_exist4{r}#76, xyz{r}#21, x{r}#85, y{r}#86]] + * | \_Eval[[null[NULL] AS does_not_exist3#84, null[DOUBLE] AS x#85, null[DOUBLE] AS y#86]] + * | \_Eval[[fork2[KEYWORD] AS _fork#9]] + * | \_Eval[[TOSTRING(does_not_exist4{r}#76) AS xyz#21]] + * | \_Filter[emp_no{f}#40 > 2[INTEGER]] + * | \_Eval[[ISNULL(does_not_exist2{r}#70) AS does_not_exist2 IS NULL#6]] + * | \_Filter[first_name{f}#41 == Chris[KEYWORD] AND TOLONG(does_not_exist1{r}#64) > 5[INTEGER]] + * | \_Eval[[null[NULL] AS does_not_exist1#64, null[NULL] AS does_not_exist2#70, null[NULL] AS does_not_exist4#76]] + * | \_EsRelation[test][_meta_field{f}#46, emp_no{f}#40, first_name{f}#41, ..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[_meta_field{r}#87, emp_no{r}#88, first_name{r}#89, gender{r}#90, hire_date{r}#91, job{r}#92, job.raw{r}#93, + * languages{r}#94, last_name{r}#95, long_noidx{r}#96, salary{r}#97, does_not_exist1{r}#98, does_not_exist2{r}#99, + * does_not_exist3{r}#100, does_not_exist2 IS NULL{r}#101, _fork{r}#9, does_not_exist4{r}#102, xyz{r}#27, x{r}#13, + * y{r}#16]] + * \_Eval[[null[KEYWORD] AS _meta_field#87, null[INTEGER] AS emp_no#88, null[KEYWORD] AS first_name#89, null[TEXT] AS gender#90, + * null[DATETIME] AS hire_date#91, null[TEXT] AS job#92, null[KEYWORD] AS job.raw#93, null[INTEGER] AS languages#94, + * null[KEYWORD] AS last_name#95, null[LONG] AS long_noidx#96, null[INTEGER] AS salary#97, + * null[NULL] AS does_not_exist1#98, null[NULL] AS does_not_exist2#99, null[NULL] AS does_not_exist3#100, + * null[BOOLEAN] AS does_not_exist2 IS NULL#101, null[NULL] AS does_not_exist4#102]] + * \_Eval[[fork3[KEYWORD] AS _fork#9]] + * \_Eval[[abc[KEYWORD] AS xyz#27]] + * \_Aggregate[[],[MIN(TODOUBLE(d{r}#22),true[BOOLEAN],PT0S[TIME_DURATION]) AS x#13, + * FilteredExpression[MAX(TODOUBLE(e{r}#23), true[BOOLEAN],PT0S[TIME_DURATION]), + * TODOUBLE(d{r}#22) > 1000[INTEGER] + TODOUBLE(does_not_exist5{r}#78)] AS y#16]] + * \_Dissect[first_name{f}#52,Parser[pattern=%{d} %{e} %{f}, appendSeparator=, + * parser=org.elasticsearch.dissect.DissectParser@4b06062b],[d{r}#22, e{r}#23, f{r}#24]] + * \_Eval[[ISNULL(does_not_exist2{r}#72) AS does_not_exist2 IS NULL#6]] + * \_Filter[first_name{f}#52 == Chris[KEYWORD] AND TOLONG(does_not_exist1{r}#66) > 5[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist1#66, null[NULL] AS does_not_exist2#72, null[NULL] AS does_not_exist5#78]] + * \_EsRelation[test][_meta_field{f}#57, emp_no{f}#51, first_name{f}#52, ..] + */ + public void testForkBranchesWithDifferentSchemas() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE first_name == "Chris" AND does_not_exist1::LONG > 5 + | EVAL does_not_exist2 IS NULL + | FORK (WHERE emp_no > 3 | SORT does_not_exist3 | LIMIT 7 ) + (WHERE emp_no > 2 | EVAL xyz = does_not_exist4::KEYWORD ) + (DISSECT first_name "%{d} %{e} %{f}" + | STATS x = MIN(d::DOUBLE), y = MAX(e::DOUBLE) WHERE d::DOUBLE > 1000 + does_not_exist5::DOUBLE + | EVAL xyz = "abc") + """)); + + // Top implicit limit + var topLimit = as(plan, Limit.class); + assertThat(topLimit.limit().fold(FoldContext.small()), is(10000)); + + // Fork node + var fork = as(topLimit.child(), Fork.class); + assertThat(fork.children(), hasSize(3)); + + // Branch 0 + var b0Limit = as(fork.children().get(0), Limit.class); + assertThat(b0Limit.limit().fold(FoldContext.small()), is(10000)); + var b0Proj = as(b0Limit.child(), Project.class); + + // Adds dne4/xyz/x/y nulls, verify does_not_exist4 NULL + var b0Eval4 = as(b0Proj.child(), Eval.class); + assertThat(b0Eval4.fields(), hasSize(4)); + assertThat(as(as(b0Eval4.fields().get(0), Alias.class).child(), Literal.class).dataType(), is(DataType.NULL)); // does_not_exist4 + + // Fork label + var b0EvalFork = as(b0Eval4.child(), Eval.class); + var b0ForkAlias = as(b0EvalFork.fields().getFirst(), Alias.class); + assertThat(b0ForkAlias.name(), is("_fork")); + + // Inner limit -> orderBy -> filter chain + var b0InnerLimit = as(b0EvalFork.child(), Limit.class); + assertThat(b0InnerLimit.limit().fold(FoldContext.small()), is(7)); + var b0OrderBy = as(b0InnerLimit.child(), OrderBy.class); + var b0FilterEmp = b0OrderBy.child(); + + // EVAL does_not_exist2 IS NULL (boolean alias present) + var b0IsNull = as(b0FilterEmp, Filter.class); + var b0IsNullEval = as(b0IsNull.child(), Eval.class); + var b0IsNullAlias = as(b0IsNullEval.fields().getFirst(), Alias.class); + assertThat(b0IsNullAlias.name(), is("does_not_exist2 IS NULL")); + + // WHERE first_name == Chris AND ToLong(does_not_exist1) > 5 + var b0Filter = as(b0IsNullEval.child(), Filter.class); + var b0And = as(b0Filter.condition(), And.class); + var b0RightGt = as(b0And.right(), GreaterThan.class); + var b0RightToLong = as(b0RightGt.left(), ToLong.class); + assertThat(Expressions.name(b0RightToLong.field()), is("does_not_exist1")); + assertThat(as(b0RightGt.right(), Literal.class).value(), is(5)); + + // Chain of Evals adding dne1/dne2/dne3 NULLs + var b0EvalDne1 = as(b0Filter.child(), Eval.class); + var b0EvalDne1Alias = as(b0EvalDne1.fields().get(0), Alias.class); + assertThat(b0EvalDne1Alias.name(), is("does_not_exist1")); + assertThat(as(b0EvalDne1Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var b0EvalDne2Alias = as(b0EvalDne1.fields().get(1), Alias.class); + assertThat(b0EvalDne2Alias.name(), is("does_not_exist2")); + assertThat(as(b0EvalDne2Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var b0EvalDne3Alias = as(b0EvalDne1.fields().get(2), Alias.class); + assertThat(b0EvalDne3Alias.name(), is("does_not_exist3")); + assertThat(as(b0EvalDne3Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var b0Rel = as(b0EvalDne1.child(), EsRelation.class); + assertThat(b0Rel.indexPattern(), is("test")); + + // Branch 1 + var b1Limit = as(fork.children().get(1), Limit.class); + assertThat(b1Limit.limit().fold(FoldContext.small()), is(1000)); + var b1Proj = as(b1Limit.child(), Project.class); + + // Adds dne3,x,y NULLs at top + var b1Eval3 = as(b1Proj.child(), Eval.class); + assertThat(b1Eval3.fields(), hasSize(3)); + assertThat(as(as(b1Eval3.fields().get(0), Alias.class).child(), Literal.class).dataType(), is(DataType.NULL)); // does_not_exist3 + + // Fork label + var b1EvalFork = as(b1Eval3.child(), Eval.class); + var b1ForkAlias = as(b1EvalFork.fields().getFirst(), Alias.class); + assertThat(b1ForkAlias.name(), is("_fork")); + + // xyz = ToString(does_not_exist4) + var b1EvalXyz = as(b1EvalFork.child(), Eval.class); + var b1XyzAlias = as(b1EvalXyz.fields().getFirst(), Alias.class); + assertThat(b1XyzAlias.name(), is("xyz")); + as(b1XyzAlias.child(), ToString.class); + + // WHERE emp_no > 2 + var b1FilterEmp = as(b1EvalXyz.child(), Filter.class); + + // EVAL does_not_exist2 IS NULL (boolean alias present) + var b1IsNullEval = as(b1FilterEmp.child(), Eval.class); + var b1IsNullAlias = as(b1IsNullEval.fields().getFirst(), Alias.class); + assertThat(b1IsNullAlias.name(), is("does_not_exist2 IS NULL")); + + // WHERE first_name == Chris AND ToLong(does_not_exist1) > 5 + var b1Filter = as(b1IsNullEval.child(), Filter.class); + var b1And = as(b1Filter.condition(), And.class); + var b1RightGt = as(b1And.right(), GreaterThan.class); + var b1RightToLong = as(b1RightGt.left(), ToLong.class); + assertThat(Expressions.name(b1RightToLong.field()), is("does_not_exist1")); + assertThat(as(b1RightGt.right(), Literal.class).value(), is(5)); + + // Chain of Evals adding dne1/dne2/dne4 NULLs + var b1EvalDne1 = as(b1Filter.child(), Eval.class); + var b1EvalDne1Alias = as(b1EvalDne1.fields().get(0), Alias.class); + assertThat(b1EvalDne1Alias.name(), is("does_not_exist1")); + assertThat(as(b1EvalDne1Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var b1EvalDne2Alias = as(b1EvalDne1.fields().get(1), Alias.class); + assertThat(b1EvalDne2Alias.name(), is("does_not_exist2")); + assertThat(as(b1EvalDne2Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var b1EvalDne3Alias = as(b1EvalDne1.fields().get(2), Alias.class); + assertThat(b1EvalDne3Alias.name(), is("does_not_exist4")); + assertThat(as(b1EvalDne3Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var b1Rel = as(b1EvalDne1.child(), EsRelation.class); + assertThat(b1Rel.indexPattern(), is("test")); + + // Branch 2 + var b2Limit = as(fork.children().get(2), Limit.class); + assertThat(b2Limit.limit().fold(FoldContext.small()), is(1000)); + var b2Proj = as(b2Limit.child(), Project.class); + + // Many nulls including does_not_exist1/2/3/4 + var b2EvalNulls = as(b2Proj.child(), Eval.class); + assertThat(b2EvalNulls.fields(), hasSize(16)); + // Spot-check presence and NULL types for does_not_exist1..4 + var b2NullNames = Expressions.names(b2EvalNulls.fields()); + assertThat(b2NullNames.contains("does_not_exist1"), is(true)); + assertThat(b2NullNames.contains("does_not_exist2"), is(true)); + assertThat(b2NullNames.contains("does_not_exist3"), is(true)); + assertThat(b2NullNames.contains("does_not_exist4"), is(true)); + // Verify their datatypes are NULL + for (var alias : b2EvalNulls.fields()) { + var a = as(alias, Alias.class); + if (a.name().startsWith("does_not_exist2 IS NULL")) { + assertThat(as(a.child(), Literal.class).dataType(), is(DataType.BOOLEAN)); + } else if (a.name().startsWith("does_not_exist")) { + assertThat(as(a.child(), Literal.class).dataType(), is(DataType.NULL)); + } + } + + // Fork label + var b2EvalFork = as(b2EvalNulls.child(), Eval.class); + var b2ForkAlias = as(b2EvalFork.fields().getFirst(), Alias.class); + assertThat(b2ForkAlias.name(), is("_fork")); + + // xyz constant then Aggregate with FilteredExpression using does_not_exist5 + var b2EvalXyz = as(b2EvalFork.child(), Eval.class); + var b2Agg = as(b2EvalXyz.child(), Aggregate.class); + assertThat(b2Agg.groupings(), hasSize(0)); + assertThat(b2Agg.aggregates(), hasSize(2)); + var filteredAlias = as(b2Agg.aggregates().get(1), Alias.class); + var filtered = as(filteredAlias.child(), FilteredExpression.class); + as(filtered.delegate(), Max.class); + var feCondGT = as(filtered.filter(), GreaterThan.class); + var feCondGTAdd = as(feCondGT.right(), Add.class); + // Right side of Add must be ToDouble(does_not_exist5) + var dne5Convert = as(feCondGTAdd.right(), ConvertFunction.class); + var dne5Ref = as(dne5Convert.field(), ReferenceAttribute.class); + assertThat(dne5Ref.name(), is("does_not_exist5")); + + var dissect = as(b2Agg.child(), Dissect.class); + var evalDne2IsNull = as(dissect.child(), Eval.class); + var dne2IsNullAlias = as(evalDne2IsNull.fields().getFirst(), Alias.class); + assertThat(dne2IsNullAlias.name(), is("does_not_exist2 IS NULL")); + var filter = as(evalDne2IsNull.child(), Filter.class); + var and = as(filter.condition(), And.class); + var rightGt = as(and.right(), GreaterThan.class); + var rightToLong = as(rightGt.left(), ToLong.class); + assertThat(Expressions.name(rightToLong.field()), is("does_not_exist1")); + assertThat(as(rightGt.right(), Literal.class).value(), is(5)); + + var evalDne1 = as(filter.child(), Eval.class); + var dne1Alias = as(evalDne1.fields().get(0), Alias.class); + assertThat(dne1Alias.name(), is("does_not_exist1")); + assertThat(as(dne1Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var dne2Alias = as(evalDne1.fields().get(1), Alias.class); + assertThat(dne2Alias.name(), is("does_not_exist2")); + assertThat(as(dne2Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + var dne3Alias = as(evalDne1.fields().get(2), Alias.class); + assertThat(dne3Alias.name(), is("does_not_exist5")); + assertThat(as(dne3Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var rel = as(evalDne1.child(), EsRelation.class); + assertThat(rel.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[does_not_exist2{r}#46,ASC,LAST]]] + * \_Fork[[c{r}#45, does_not_exist2{r}#46, _fork{r}#47, d{r}#48]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[c{r}#6, does_not_exist2{r}#39, _fork{r}#7, d{r}#42]] + * | \_Eval[[null[DOUBLE] AS d#42]] + * | \_Eval[[fork1[KEYWORD] AS _fork#7]] + * | \_Aggregate[[does_not_exist2{r}#39],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#6, does_not_exist2{r}#39]] + * | \_Filter[ISNULL(does_not_exist1{r}#35)] + * | \_Eval[[null[NULL] AS does_not_exist1#35, null[NULL] AS does_not_exist2#39]] + * | \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[c{r}#43, does_not_exist2{r}#44, _fork{r}#7, d{r}#10]] + * \_Eval[[null[LONG] AS c#43, null[NULL] AS does_not_exist2#44]] + * \_Eval[[fork2[KEYWORD] AS _fork#7]] + * \_Aggregate[[],[AVG(salary{f}#29,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS d#10]] + * \_Filter[ISNULL(does_not_exist1{r}#37)] + * \_Eval[[null[NULL] AS does_not_exist1#37]] + * \_EsRelation[test][_meta_field{f}#30, emp_no{f}#24, first_name{f}#25, ..] + */ + public void testForkBranchesAfterStats1stBranch() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE does_not_exist1 IS NULL + | FORK (STATS c = COUNT(*) BY does_not_exist2) + (STATS d = AVG(salary)) + | SORT does_not_exist2 + """)); + + // TODO: golden testing + assertThat(Expressions.names(plan.output()), is(List.of("c", "does_not_exist2", "_fork", "d"))); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_OrderBy[[Order[does_not_exist2{r}#48,ASC,LAST]]] + * \_Fork[[c{r}#45, _fork{r}#46, d{r}#47, does_not_exist2{r}#48]] + * |_Limit[1000[INTEGER],false,false] + * | \_EsqlProject[[c{r}#5, _fork{r}#6, d{r}#42, does_not_exist2{r}#43]] + * | \_Eval[[null[DOUBLE] AS d#42, null[NULL] AS does_not_exist2#43]] + * | \_Eval[[fork1[KEYWORD] AS _fork#6]] + * | \_Aggregate[[],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#5]] + * | \_Filter[ISNULL(does_not_exist1{r}#35)] + * | \_Eval[[null[NULL] AS does_not_exist1#35]] + * | \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] + * \_Limit[1000[INTEGER],false,false] + * \_EsqlProject[[c{r}#44, _fork{r}#6, d{r}#10, does_not_exist2{r}#39]] + * \_Eval[[null[LONG] AS c#44]] + * \_Eval[[fork2[KEYWORD] AS _fork#6]] + * \_Aggregate[[does_not_exist2{r}#39],[AVG(salary{f}#29,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS d#10, + * does_not_exist2{r}#39]] + * \_Filter[ISNULL(does_not_exist1{r}#37)] + * \_Eval[[null[NULL] AS does_not_exist1#37, null[NULL] AS does_not_exist2#39]] + * \_EsRelation[test][_meta_field{f}#30, emp_no{f}#24, first_name{f}#25, ..] + */ + public void testForkBranchesAfterStats2ndBranch() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | WHERE does_not_exist1 IS NULL + | FORK (STATS c = COUNT(*)) + (STATS d = AVG(salary) BY does_not_exist2) + | SORT does_not_exist2 + """)); + + // TODO: golden testing + assertThat(Expressions.names(plan.output()), is(List.of("c", "_fork", "d", "does_not_exist2"))); + } + + public void testFailAfterForkOfStats() { + var query = """ + FROM test + | WHERE does_not_exist1 IS NULL + | FORK (STATS c = COUNT(*)) + (STATS d = AVG(salary)) + (DISSECT hire_date::KEYWORD "%{year}-%{month}-%{day}T" + | STATS x = MIN(year::LONG), y = MAX(month::LONG) WHERE year::LONG > 1000 + does_not_exist2::DOUBLE) + | EVAL e = does_not_exist3 + 1 + """; + var failure = "line 7:12: Unknown column [does_not_exist3]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_InlineStats[] + * \_Aggregate[[does_not_exist2{r}#19],[SUM(does_not_exist1{r}#20,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS c#5, + * does_not_exist2{r}#19]] + * \_Eval[[null[NULL] AS does_not_exist2#19, null[NULL] AS does_not_exist1#20]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testInlineStats() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | INLINE STATS c = SUM(does_not_exist1) BY does_not_exist2 + """)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // InlineStats wrapping Aggregate + var inlineStats = as(limit.child(), InlineStats.class); + var agg = as(inlineStats.child(), Aggregate.class); + + // Grouping by does_not_exist2 and SUM over does_not_exist1 + assertThat(agg.groupings(), hasSize(1)); + var groupRef = as(agg.groupings().getFirst(), ReferenceAttribute.class); + assertThat(groupRef.name(), is("does_not_exist2")); + + assertThat(agg.aggregates(), hasSize(2)); + var cAlias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(cAlias.name(), is("c")); + as(cAlias.child(), Sum.class); + + // Upstream Eval introduces does_not_exist2 and does_not_exist1 as NULL + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + + var dne2Alias = as(eval.fields().get(0), Alias.class); + assertThat(dne2Alias.name(), is("does_not_exist2")); + assertThat(as(dne2Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var dne1Alias = as(eval.fields().get(1), Alias.class); + assertThat(dne1Alias.name(), is("does_not_exist1")); + assertThat(as(dne1Alias.child(), Literal.class).dataType(), is(DataType.NULL)); + + // Underlying relation + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_LookupJoin[LEFT,[language_code{r}#5],[language_code{f}#19],false,null] + * |_Eval[[TOINTEGER(does_not_exist{r}#21) AS language_code#5]] + * | \_Eval[[null[NULL] AS does_not_exist#21]] + * | \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + * \_EsRelation[languages_lookup][LOOKUP][language_code{f}#19, language_name{f}#20] + */ + public void testLookupJoin() { + String query = """ + FROM test + | EVAL language_code = does_not_exist::INTEGER + | LOOKUP JOIN languages_lookup ON language_code + """; + var plan = analyzeStatement(setUnmappedNullify(query)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // LookupJoin over alias `language_code` + var lj = as(limit.child(), LookupJoin.class); + assertThat(lj.config().type(), is(JoinTypes.LEFT)); + + // Left child: EVAL language_code = TOINTEGER(does_not_exist), with upstream NULL alias for does_not_exist + var leftEval = as(lj.left(), Eval.class); + assertThat(leftEval.fields(), hasSize(1)); + var langCodeAlias = as(leftEval.fields().getFirst(), Alias.class); + assertThat(langCodeAlias.name(), is("language_code")); + as(langCodeAlias.child(), ToInteger.class); + + var upstreamEval = as(leftEval.child(), Eval.class); + assertThat(upstreamEval.fields(), hasSize(1)); + var dneAlias = as(upstreamEval.fields().getFirst(), Alias.class); + assertThat(dneAlias.name(), is("does_not_exist")); + assertThat(as(dneAlias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var leftRel = as(upstreamEval.child(), EsRelation.class); + assertThat(leftRel.indexPattern(), is("test")); + + // Right lookup table + var rightRel = as(lj.right(), EsRelation.class); + assertThat(rightRel.indexPattern(), is("languages_lookup")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Enrich[ANY,languages[KEYWORD],x{r}#5,{"match":{"indices":[],"match_field":"language_code", + * "enrich_fields":["language_name"]}},{=languages_idx},[language_name{r}#21]] + * \_Eval[[TOSTRING(does_not_exist{r}#22) AS x#5]] + * \_Eval[[null[NULL] AS does_not_exist#22]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testEnrich() { + String query = """ + FROM test + | EVAL x = does_not_exist::KEYWORD + | ENRICH languages ON x + """; + var plan = analyzeStatement(setUnmappedNullify(query)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // Enrich over alias `x` produced by TOSTRING(does_not_exist) + var enrich = as(limit.child(), Enrich.class); + assertThat(enrich.matchField().name(), is("x")); + assertThat(Expressions.names(enrich.enrichFields()), contains("language_name")); + + // Left child: EVAL x = TOSTRING(does_not_exist), with upstream NULL alias for does_not_exist + var leftEval = as(enrich.child(), Eval.class); + assertThat(leftEval.fields(), hasSize(1)); + var xAlias = as(leftEval.fields().getFirst(), Alias.class); + assertThat(xAlias.name(), is("x")); + as(xAlias.child(), ToString.class); + + var upstreamEval = as(leftEval.child(), Eval.class); + assertThat(upstreamEval.fields(), hasSize(1)); + var dneAlias = as(upstreamEval.fields().getFirst(), Alias.class); + assertThat(dneAlias.name(), is("does_not_exist")); + assertThat(as(dneAlias.child(), Literal.class).dataType(), is(DataType.NULL)); + + var leftRel = as(upstreamEval.child(), EsRelation.class); + assertThat(leftRel.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Filter[KNN(does_not_exist{r}#16,TODENSEVECTOR([0, 1, 2][INTEGER]))] + * \_Eval[[null[NULL] AS does_not_exist#16]] + * \_EsRelation[test][_meta_field{f}#11, emp_no{f}#5, first_name{f}#6, ge..] + */ + public void testSemanticText() { + String query = """ + FROM test + | WHERE KNN(does_not_exist, [0, 1, 2]) + """; + var plan = analyzeStatement(setUnmappedNullify(query)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // Filter node + var filter = as(limit.child(), Filter.class); + assertNotNull(filter.condition()); // KNN(does_not_exist, TODENSEVECTOR([...])) + + // Upstream Eval introduces does_not_exist as NULL + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var dneAlias = as(eval.fields().getFirst(), Alias.class); + assertThat(dneAlias.name(), is("does_not_exist")); + assertThat(as(dneAlias.child(), Literal.class).dataType(), is(DataType.NULL)); + + // Underlying relation + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_EsqlProject[[x{r}#4, does_not_exist_field1{r}#12, y{r}#8, does_not_exist_field2{r}#14]] + * \_Eval[[TOINTEGER(does_not_exist_field1{r}#12) + x{r}#4 AS y#8]] + * \_Eval[[null[NULL] AS does_not_exist_field1#12, null[NULL] AS does_not_exist_field2#14]] + * \_Row[[1[INTEGER] AS x#4]] + */ + public void testRow() { + var plan = analyzeStatement(setUnmappedNullify(""" + ROW x = 1 + | EVAL y = does_not_exist_field1::INTEGER + x + | KEEP *, does_not_exist_field2 + """)); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var project = as(limit.child(), Project.class); + assertThat(project.projections(), hasSize(4)); + assertThat(Expressions.names(project.projections()), is(List.of("x", "does_not_exist_field1", "y", "does_not_exist_field2"))); + + var evalY = as(project.child(), Eval.class); + assertThat(evalY.fields(), hasSize(1)); + var aliasY = as(evalY.fields().getFirst(), Alias.class); + assertThat(aliasY.name(), is("y")); + assertThat(Expressions.name(aliasY.child()), is("does_not_exist_field1::INTEGER + x")); + + var evalDne1 = as(evalY.child(), Eval.class); + assertThat(Expressions.names(evalDne1.fields()), is(List.of("does_not_exist_field1", "does_not_exist_field2"))); + + var row = as(evalDne1.child(), Row.class); + assertThat(row.fields(), hasSize(1)); + assertThat(Expressions.name(row.fields().getFirst()), is("x")); + } + + public void testChangedTimestmapFieldWithRate() { + verificationFailure(setUnmappedNullify(""" + TS k8s + | RENAME @timestamp AS newTs + | STATS max(rate(network.total_cost)) BY tbucket = bucket(newTs, 1hour) + """), "3:13: [rate(network.total_cost)] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX); + + verificationFailure(setUnmappedNullify(""" + TS k8s + | DROP @timestamp + | STATS max(rate(network.total_cost)) + """), "3:13: [rate(network.total_cost)] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX); + } + + private void verificationFailure(String statement, String expectedFailure) { + var e = expectThrows(VerificationException.class, () -> analyzeStatement(statement)); + assertThat(e.getMessage(), containsString(expectedFailure)); + } + + private static String setUnmappedNullify(String query) { + assumeTrue("Requires OPTIONAL_FIELDS", EsqlCapabilities.Cap.OPTIONAL_FIELDS.isEnabled()); + return "SET unmapped_fields=\"nullify\"; " + query; + } + + private static String setUnmappedLoad(String query) { + assumeTrue("Requires OPTIONAL_FIELDS", EsqlCapabilities.Cap.OPTIONAL_FIELDS.isEnabled()); + return "SET unmapped_fields=\"load\"; " + query; + } + + @Override + protected List filteredWarnings() { + return withInlinestatsWarning(withDefaultLimitWarning(super.filteredWarnings())); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 4f3cb4f464b93..2492b53ebb200 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -749,6 +749,10 @@ public void testDropAfterRenaming() { assertEquals("1:40: Unknown column [emp_no]", error("from test | rename emp_no as r1 | drop emp_no")); } + public void testDropUnknownPattern() { + assertEquals("1:18: No matches found for pattern [foobar*]", error("from test | drop foobar*")); + } + public void testNonStringFieldsInDissect() { assertEquals( "1:21: Dissect only supports KEYWORD or TEXT values, found expression [emp_no] type [INTEGER]", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/promql/PromqlLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/promql/PromqlLogicalPlanOptimizerTests.java index ab7aa6b2c295a..eb3a7ea4d53c6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/promql/PromqlLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/promql/PromqlLogicalPlanOptimizerTests.java @@ -64,6 +64,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyInferenceResolution; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.analysis.VerifierTests.error; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; @@ -91,7 +92,8 @@ public static void initTest() { emptyMap(), enrichResolution, emptyInferenceResolution(), - TransportVersion.current() + TransportVersion.current(), + UNMAPPED_FIELDS.defaultValue() ), TEST_VERIFIER ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TimeSeriesBareAggregationsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TimeSeriesBareAggregationsTests.java index 9a7962b0d5167..b86554ee093f4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TimeSeriesBareAggregationsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/TimeSeriesBareAggregationsTests.java @@ -37,6 +37,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyInferenceResolution; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -64,7 +65,8 @@ public static void initK8s() { defaultLookupResolution(), enrichResolution, emptyInferenceResolution(), - TransportVersion.minimumCompatible() + TransportVersion.minimumCompatible(), + UNMAPPED_FIELDS.defaultValue() ), TEST_VERIFIER ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java index 6d810d36eed13..456eae9b7566a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java @@ -65,10 +65,18 @@ LogicalPlan query(String e, QueryParams params) { return parser.parseQuery(e, params); } + EsqlStatement statement(String e) { + return statement(e, new QueryParams()); + } + EsqlStatement statement(String e, QueryParams params) { return parser.createStatement(e, params); } + EsqlStatement unvalidatedStatement(String e, QueryParams params) { + return parser.unvalidatedStatement(e, params); + } + LogicalPlan processingCommand(String e) { return parser.parseQuery("row a = 1 | " + e); } @@ -192,6 +200,15 @@ void expectVerificationError(String query, String errorMessage) { ); } + void expectValidationError(String statement, String errorMessage) { + expectThrows( + "Statement [" + statement + "] is expected to throw " + ParsingException.class + " with message [" + errorMessage + "]", + ParsingException.class, + containsString(errorMessage), + () -> parser.createStatement(statement) + ); + } + void expectInvalidIndexNameErrorWithLineNumber(String query, String indexString, String lineNumber) { if ((indexString.contains("|") || indexString.contains(" ")) == false) { expectInvalidIndexNameErrorWithLineNumber(query, indexString, lineNumber, indexString); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/SetParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/SetParserTests.java index ce7b63def5a3e..9bb06bd2a397b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/SetParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/SetParserTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -17,9 +18,12 @@ import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.Row; +import java.util.Arrays; import java.util.List; import java.util.Map; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomizeCase; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -28,17 +32,17 @@ public class SetParserTests extends AbstractStatementParserTests { public void testSet() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement("SET foo = \"bar\"; row a = 1", new QueryParams()); + EsqlStatement query = unvalidatedStatement("SET foo = \"bar\"; row a = 1", new QueryParams()); assertThat(query.plan(), is(instanceOf(Row.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "foo", BytesRefs.toBytesRef("bar")); - query = statement("SET bar = 2; row a = 1 | eval x = 12", new QueryParams()); + query = unvalidatedStatement("SET bar = 2; row a = 1 | eval x = 12", new QueryParams()); assertThat(query.plan(), is(instanceOf(Eval.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "bar", 2); - query = statement("SET bar = true; row a = 1 | eval x = 12", new QueryParams()); + query = unvalidatedStatement("SET bar = true; row a = 1 | eval x = 12", new QueryParams()); assertThat(query.plan(), is(instanceOf(Eval.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "bar", true); @@ -48,17 +52,17 @@ public void testSet() { public void testSetWithTripleQuotes() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement("SET foo = \"\"\"bar\"baz\"\"\"; row a = 1", new QueryParams()); + EsqlStatement query = unvalidatedStatement("SET foo = \"\"\"bar\"baz\"\"\"; row a = 1", new QueryParams()); assertThat(query.plan(), is(instanceOf(Row.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "foo", BytesRefs.toBytesRef("bar\"baz")); - query = statement("SET foo = \"\"\"bar\"\"\"\"; row a = 1", new QueryParams()); + query = unvalidatedStatement("SET foo = \"\"\"bar\"\"\"\"; row a = 1", new QueryParams()); assertThat(query.plan(), is(instanceOf(Row.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "foo", BytesRefs.toBytesRef("bar\"")); - query = statement("SET foo = \"\"\"\"bar\"\"\"; row a = 1 | LIMIT 3", new QueryParams()); + query = unvalidatedStatement("SET foo = \"\"\"\"bar\"\"\"; row a = 1 | LIMIT 3", new QueryParams()); assertThat(query.plan(), is(instanceOf(Limit.class))); assertThat(query.settings().size(), is(1)); checkSetting(query, 0, "foo", BytesRefs.toBytesRef("\"bar")); @@ -66,7 +70,7 @@ public void testSetWithTripleQuotes() { public void testMultipleSet() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement( + EsqlStatement query = unvalidatedStatement( "SET foo = \"bar\"; SET bar = 2; SET foo = \"baz\"; SET x = 3.5; SET y = false; SET z = null; row a = 1", new QueryParams() ); @@ -83,7 +87,7 @@ public void testMultipleSet() { public void testSetArrays() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement("SET foo = [\"bar\", \"baz\"]; SET bar = [1, 2, 3]; row a = 1", new QueryParams()); + EsqlStatement query = unvalidatedStatement("SET foo = [\"bar\", \"baz\"]; SET bar = [1, 2, 3]; row a = 1", new QueryParams()); assertThat(query.plan(), is(instanceOf(Row.class))); assertThat(query.settings().size(), is(2)); @@ -93,7 +97,7 @@ public void testSetArrays() { public void testSetWithNamedParams() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement( + EsqlStatement query = unvalidatedStatement( "SET foo = \"bar\"; SET bar = ?a; SET foo = \"baz\"; SET x = ?x; row a = 1", new QueryParams( List.of( @@ -113,7 +117,7 @@ public void testSetWithNamedParams() { public void testSetWithPositionalParams() { assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); - EsqlStatement query = statement( + EsqlStatement query = unvalidatedStatement( "SET foo = \"bar\"; SET bar = ?; SET foo = \"baz\"; SET x = ?; row a = ?", new QueryParams( List.of( @@ -139,15 +143,16 @@ public void testSetWithMap() { // non-constant map try { - statement(""" + unvalidatedStatement(""" SET my_map = {"foo": bar}; ROW a = 1 """, new QueryParams()); + fail("ParsingException expected"); } catch (ParsingException e) { assertThat(e.getMessage(), containsString("mismatched input 'bar' expecting")); } - EsqlStatement query = statement(""" + EsqlStatement query = unvalidatedStatement(""" SET my_map = {"foo": {"bar": 2, "baz": "bb"}, "x": false}; ROW a = 1 """, new QueryParams()); @@ -200,4 +205,29 @@ private Object settingValue(EsqlStatement query, int position) { return query.settings().get(position).value().fold(FoldContext.small()); } + public void testSetUnmappedFields() { + assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); + assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.OPTIONAL_FIELDS.isEnabled()); + var modes = List.of("FAIL", "NULLIFY", "LOAD"); + assertThat(modes.size(), is(UnmappedResolution.values().length)); + for (var mode : modes) { + EsqlStatement statement = statement("SET unmapped_fields=\"" + randomizeCase(mode) + "\"; row a = 1"); + assertThat(statement.setting(UNMAPPED_FIELDS), is(UnmappedResolution.valueOf(mode))); + assertThat(statement.plan(), is(instanceOf(Row.class))); + } + } + + public void testSetUnmappedFieldsWrongValue() { + assumeTrue("SET command available in snapshot only", EsqlCapabilities.Cap.SET_COMMAND.isEnabled()); + var mode = randomValueOtherThanMany( + v -> Arrays.stream(UnmappedResolution.values()).anyMatch(x -> x.name().equalsIgnoreCase(v)), + () -> randomAlphaOfLengthBetween(0, 10) + ); + expectValidationError( + "SET unmapped_fields=\"" + mode + "\"; row a = 1", + "Error validating setting [unmapped_fields]: Invalid unmapped_fields resolution [" + + mode + + "], must be one of [FAIL, NULLIFY, LOAD]" + ); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/QuerySettingsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/QuerySettingsTests.java index a29dee0ec24b5..e7326c3ca11a6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/QuerySettingsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/QuerySettingsTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -24,6 +25,8 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.of; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomizeCase; import static org.hamcrest.Matchers.both; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -41,14 +44,14 @@ public class QuerySettingsTests extends ESTestCase { public void testValidate_NonExistingSetting() { String settingName = "non_existing"; - assertInvalid(settingName, Literal.keyword(Source.EMPTY, "12"), "Unknown setting [" + settingName + "]"); + assertInvalid(settingName, of("12"), "Unknown setting [" + settingName + "]"); } public void testValidate_ProjectRouting() { var setting = QuerySettings.PROJECT_ROUTING; assertDefault(setting, nullValue()); - assertValid(setting, Literal.keyword(Source.EMPTY, "my-project"), equalTo("my-project")); + assertValid(setting, of("my-project"), equalTo("my-project")); assertInvalid( setting.name(), @@ -63,7 +66,7 @@ public void testValidate_ProjectRouting_noCps() { assertInvalid( setting.name(), SNAPSHOT_CTX_WITH_CPS_DISABLED, - Literal.keyword(Source.EMPTY, "my-project"), + of("my-project"), "Error validating setting [project_routing]: cross-project search not enabled" ); } @@ -73,27 +76,54 @@ public void testValidate_TimeZone() { assertDefault(setting, both(equalTo(ZoneId.of("Z"))).and(equalTo(ZoneOffset.UTC))); - assertValid(setting, Literal.keyword(Source.EMPTY, "UTC"), equalTo(ZoneId.of("UTC"))); - assertValid(setting, Literal.keyword(Source.EMPTY, "Z"), both(equalTo(ZoneId.of("Z"))).and(equalTo(ZoneOffset.UTC))); - assertValid(setting, Literal.keyword(Source.EMPTY, "Europe/Madrid"), equalTo(ZoneId.of("Europe/Madrid"))); - assertValid(setting, Literal.keyword(Source.EMPTY, "+05:00"), equalTo(ZoneId.of("+05:00"))); - assertValid(setting, Literal.keyword(Source.EMPTY, "+05"), equalTo(ZoneId.of("+05"))); - assertValid(setting, Literal.keyword(Source.EMPTY, "+07:15"), equalTo(ZoneId.of("+07:15"))); + assertValid(setting, of("UTC"), equalTo(ZoneId.of("UTC"))); + assertValid(setting, of("Z"), both(equalTo(ZoneId.of("Z"))).and(equalTo(ZoneOffset.UTC))); + assertValid(setting, of("Europe/Madrid"), equalTo(ZoneId.of("Europe/Madrid"))); + assertValid(setting, of("+05:00"), equalTo(ZoneId.of("+05:00"))); + assertValid(setting, of("+05"), equalTo(ZoneId.of("+05"))); + assertValid(setting, of("+07:15"), equalTo(ZoneId.of("+07:15"))); assertInvalid(setting.name(), Literal.integer(Source.EMPTY, 12), "Setting [" + setting.name() + "] must be of type KEYWORD"); assertInvalid( setting.name(), - Literal.keyword(Source.EMPTY, "Europe/New York"), + of("Europe/New York"), "Error validating setting [" + setting.name() + "]: Invalid time zone [Europe/New York]" ); } + public void testValidate_UnmappedFields() { + var setting = QuerySettings.UNMAPPED_FIELDS; + + assertDefault(setting, equalTo(UnmappedResolution.FAIL)); + + assertValid(setting, of(randomizeCase("fail")), equalTo(UnmappedResolution.FAIL)); + assertValid(setting, of(randomizeCase("nullify")), equalTo(UnmappedResolution.NULLIFY)); + assertValid(setting, of(randomizeCase("load")), equalTo(UnmappedResolution.LOAD)); + + assertInvalid(setting.name(), of(12), "Setting [" + setting.name() + "] must be of type KEYWORD"); + assertInvalid( + setting.name(), + of("UNKNOWN"), + "Error validating setting [unmapped_fields]: Invalid unmapped_fields resolution [UNKNOWN], must be one of [FAIL, NULLIFY, LOAD]" + ); + } + public void testValidate_TimeZone_nonSnapshot() { var setting = QuerySettings.TIME_ZONE; assertInvalid( setting.name(), NON_SNAPSHOT_CTX_WITH_CPS_ENABLED, - Literal.keyword(Source.EMPTY, "UTC"), + of("UTC"), + "Setting [" + setting.name() + "] is only available in snapshot builds" + ); + } + + public void testValidate_UnmappedFields_nonSnapshot() { + var setting = QuerySettings.UNMAPPED_FIELDS; + assertInvalid( + setting.name(), + NON_SNAPSHOT_CTX_WITH_CPS_ENABLED, + of("LOAD"), "Setting [" + setting.name() + "] is only available in snapshot builds" ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java index 71140503901da..afcabb04cb5b1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/FilterTests.java @@ -66,6 +66,7 @@ import static org.elasticsearch.xpack.esql.core.util.Queries.Clause.FILTER; import static org.elasticsearch.xpack.esql.core.util.Queries.Clause.MUST; import static org.elasticsearch.xpack.esql.core.util.Queries.Clause.SHOULD; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; import static org.hamcrest.Matchers.nullValue; public class FilterTests extends ESTestCase { @@ -97,7 +98,8 @@ public static void init() { Map.of(), EsqlTestUtils.emptyPolicyResolution(), emptyInferenceResolution(), - minimumVersion + minimumVersion, + UNMAPPED_FIELDS.defaultValue() ), TEST_VERIFIER ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java index 25f5af91273bf..898c6ba2d8f40 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java @@ -46,6 +46,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.unboundLogicalOptimizerContext; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexResolutions; +import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; public class ClusterRequestTests extends AbstractWireSerializingTestCase { @@ -183,7 +184,8 @@ static Versioned parse(String query) { Map.of(), emptyPolicyResolution(), emptyInferenceResolution(), - minimumVersion + minimumVersion, + UNMAPPED_FIELDS.defaultValue() ), TEST_VERIFIER ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java index b48cf959fb016..9ff7c60b199fd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java @@ -54,6 +54,7 @@ import org.elasticsearch.xpack.esql.plan.logical.join.JoinConfig; import org.elasticsearch.xpack.esql.plan.logical.join.JoinType; import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes; +import org.elasticsearch.xpack.esql.plan.logical.local.ResolvingProject; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat; @@ -383,6 +384,9 @@ public void accept(Page page) { } }; } + if (toBuildClass == ResolvingProject.class && pt.getRawType() == java.util.function.Function.class) { + return java.util.function.Function.identity(); + } throw new IllegalArgumentException("Unsupported parameterized type [" + pt + "], for " + toBuildClass.getSimpleName()); } @@ -644,9 +648,23 @@ private void assertTransformedOrReplacedChildren( */ Type[] argTypes = ctor.getGenericParameterTypes(); Object[] args = new Object[argTypes.length]; - for (int i = 0; i < argTypes.length; i++) { - args[i] = nodeCtorArgs[i] == nodeCtorArgs[changedArgOffset] ? changedArgValue : nodeCtorArgs[i]; + + if (transformed instanceof ResolvingProject transformedProject && changedArgValue instanceof LogicalPlan newChild) { + for (int i = 0; i < argTypes.length; i++) { + if (i == changedArgOffset) { + args[i] = changedArgValue; + } else if (i == changedArgOffset + 2) { + args[i] = transformedProject.resolver().apply(newChild.output()); + } else { + args[i] = nodeCtorArgs[i]; + } + } + } else { + for (int i = 0; i < argTypes.length; i++) { + args[i] = nodeCtorArgs[i] == nodeCtorArgs[changedArgOffset] ? changedArgValue : nodeCtorArgs[i]; + } } + T reflectionTransformed = ctor.newInstance(args); assertEquals(reflectionTransformed, transformed); }