diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index aa6cf68324755..bd7a7e2b6e7fd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.analysis.AnalyzerRules.ParameterizedAnalyzerRule; +import org.elasticsearch.xpack.esql.analysis.rules.DisallowLoadWithPartiallyMappedNonKeyword; import org.elasticsearch.xpack.esql.analysis.rules.ResolveUnmapped; import org.elasticsearch.xpack.esql.analysis.rules.ResolvedProjects; import org.elasticsearch.xpack.esql.capabilities.ConfigurationAware; @@ -249,6 +250,7 @@ public class Analyzer extends ParameterizedRuleExecutor( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index d16dc56abbc5f..ce72d1ae51104 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -17,14 +17,20 @@ import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.capabilities.Unresolvable; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; import org.elasticsearch.xpack.esql.core.expression.function.Function; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; +import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.expression.function.TimestampAware; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; @@ -44,6 +50,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Subquery; import org.elasticsearch.xpack.esql.plan.logical.UnionAll; import org.elasticsearch.xpack.esql.plan.logical.promql.PromqlCommand; +import org.elasticsearch.xpack.esql.session.FieldNameUtils; import org.elasticsearch.xpack.esql.telemetry.FeatureMetric; import org.elasticsearch.xpack.esql.telemetry.Metrics; @@ -69,6 +76,8 @@ */ public class Verifier { + static final String UNMAPPED_TIMESTAMP_SUFFIX = "; the [unmapped_fields] setting does not apply to the implicit @timestamp reference"; + /** * Extra plan verification checks defined in plugins. */ @@ -104,9 +113,10 @@ Collection verify(LogicalPlan plan, BitSet partialMetrics) { Collection verify(LogicalPlan plan, BitSet partialMetrics, UnmappedResolution unmappedResolution) { assert partialMetrics != null; Failures failures = new Failures(); + boolean unmappedTimestampHandled = unmappedResolution == UnmappedResolution.FAIL || isTimestampUnmappedInAllIndices(plan, failures); // quick verification for unresolved attributes - checkUnresolvedAttributes(plan, failures); + checkUnresolvedAttributes(plan, failures, unmappedTimestampHandled); ConfigurationAware.verifyNoMarkerConfiguration(plan, failures); @@ -117,6 +127,7 @@ Collection verify(LogicalPlan plan, BitSet partialMetrics, UnmappedReso if (unmappedResolution == UnmappedResolution.LOAD) { checkLoadModeDisallowedCommands(plan, failures); + checkFlattenedSubFieldLoad(plan, failures); checkLoadModeDisallowedFunctions(plan, failures); } @@ -158,7 +169,7 @@ Collection verify(LogicalPlan plan, BitSet partialMetrics, UnmappedReso return failures.failures(); } - private static void checkUnresolvedAttributes(LogicalPlan plan, Failures failures) { + private static void checkUnresolvedAttributes(LogicalPlan plan, Failures failures, boolean skipUnresolvedTimestamp) { plan.forEachUp(p -> { // if the children are unresolved, so will this node; counting it will only add noise if (p.childrenResolved() == false) { @@ -191,6 +202,9 @@ else if (p.resolved()) { return; } + if (skipUnresolvedTimestamp && ae instanceof UnresolvedTimestamp) { + return; + } if (ae instanceof Unresolvable u) { failures.add(fail(ae, u.unresolvedMessage())); } @@ -363,6 +377,38 @@ private static void checkLimitBy(LogicalPlan plan, Failures failures) { } } + /** + * The {@code unmapped_fields} setting does not apply to the implicit {@code @timestamp} reference ({@link TimestampAware} functions). + * Only emits the specific message when {@code @timestamp} is truly absent from all source index mappings; + * if the field was present but dropped/renamed by the query, the generic unresolved-attribute message is more appropriate. + * See https://github.com/elastic/elasticsearch/issues/142127 + */ + private static boolean isTimestampUnmappedInAllIndices(LogicalPlan plan, Failures failures) { + if (plan.anyMatch(p -> p instanceof EsRelation r && r.indexMode() != IndexMode.LOOKUP && hasTimestamp(r))) { + return false; + } + plan.forEachDown(p -> { + if (p instanceof TimestampAware ta && ta.timestamp() instanceof UnresolvedTimestamp) { + failures.add(fail(p, "[{}] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX + UNMAPPED_TIMESTAMP_SUFFIX, p.sourceText())); + } + p.forEachExpression(Expression.class, e -> { + if (e instanceof TimestampAware ta && ta.timestamp() instanceof UnresolvedTimestamp) { + failures.add(fail(e, "[{}] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX + UNMAPPED_TIMESTAMP_SUFFIX, e.sourceText())); + } + }); + }); + return true; + } + + private static boolean hasTimestamp(EsRelation relation) { + for (Attribute attr : relation.output()) { + if (MetadataAttribute.TIMESTAMP_FIELD.equals(attr.name())) { + return true; + } + } + return false; + } + /** * {@code unmapped_fields="load"} does not yet support branching commands (FORK, LOOKUP JOIN, subqueries/views). * See https://github.com/elastic/elasticsearch/issues/142033 @@ -381,6 +427,62 @@ private static void checkLoadModeDisallowedCommands(LogicalPlan plan, Failures f }); } + /** + * Reject loading sub-fields of flattened fields when {@code unmapped_fields="load"}, by checking if any + * {@link PotentiallyUnmappedKeywordEsField} is a sub-field of a parent field whose original type is flattened. The reason is that + * flattened subfields resolution may eventually differ from what happens when {@code unmapped_fields="load"}. + */ + private static void checkFlattenedSubFieldLoad(LogicalPlan plan, Failures failures) { + plan.forEachDown(p -> { + if (p instanceof EsRelation == false) { + return; + } + + EsRelation esRelation = (EsRelation) p; + Set flattenedFieldNames = flattenedFieldNames(esRelation.output()); + + if (flattenedFieldNames.isEmpty()) { + return; + } + + for (Attribute attr : esRelation.output()) { + if (!(attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField)) { + continue; + } + + String name = fa.name(); + List prefixes = FieldNameUtils.parentPrefixes(name); + for (String parent : prefixes) { + if (flattenedFieldNames.contains(parent)) { + Failure failure = fail( + fa, + "Loading subfield [{}] when parent [{}] is of flattened field type is not supported with " + + "unmapped_fields=\"load\"", + name, + parent + ); + failures.add(failure); + break; + } + } + } + }); + } + + private static Set flattenedFieldNames(List attributes) { + Set names = new HashSet<>(); + + for (Attribute attribute : attributes) { + if (attribute instanceof FieldAttribute fa + && fa.field() instanceof UnsupportedEsField uef + && uef.getOriginalTypes().contains("flattened")) { + names.add(fa.name()); + } + } + + return names; + } + /** * Disallow full-text search when unmapped_fields=load. We do not restrict to "only when the FTF * is applied to an unmapped field" because FTFs like KQL can reference unmapped fields inside the diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/DisallowLoadWithPartiallyMappedNonKeyword.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/DisallowLoadWithPartiallyMappedNonKeyword.java new file mode 100644 index 0000000000000..a897b58fa8d31 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/DisallowLoadWithPartiallyMappedNonKeyword.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis.rules; + +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.AnalyzerRules; +import org.elasticsearch.xpack.esql.analysis.UnmappedResolution; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.plan.IndexPattern; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; + +/** + * Fails the query when {@code SET unmapped_fields="load"} is used and any index has a field that is + * partially mapped (present in some indices, unmapped in others) and whose type where it is mapped + * is not KEYWORD. This is a temporary restriction until semantics are finalized. + *

+ * Implemented as an analyzer rule (rather than in {@link org.elasticsearch.xpack.esql.analysis.Verifier}) + * because this check requires index resolution context (e.g. {@link EsIndex#partiallyUnmappedFields()}). + * All offending field names are collected and reported in a single exception. + */ +public class DisallowLoadWithPartiallyMappedNonKeyword extends AnalyzerRules.ParameterizedAnalyzerRule { + + @Override + protected boolean skipResolved() { + return false; // run on all nodes so we always check indices in the plan + } + + @Override + protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) { + if (context.unmappedResolution() != UnmappedResolution.LOAD) { + return plan; + } + // Only check indices that appear in the plan (EsRelation nodes) + List resolutionsInPlan = new ArrayList<>(); + plan.forEachDown(EsRelation.class, relation -> { + if (relation.indexMode() == IndexMode.LOOKUP) { + IndexResolution r = context.lookupResolution().get(relation.indexPattern()); + if (r != null) { + resolutionsInPlan.add(r); + } + } else { + IndexResolution r = context.indexResolution().get(new IndexPattern(relation.source(), relation.indexPattern())); + if (r != null) { + resolutionsInPlan.add(r); + } + } + }); + Set offending = new TreeSet<>(); + collectOffendingFromResolutions(resolutionsInPlan, offending); + if (offending.isEmpty() == false) { + throw new VerificationException( + "unmapped_fields=\"load\" is not supported when the query involves partially mapped fields that are not KEYWORD: {}. " + + "Use unmapped_fields=\"nullify\" or unmapped_fields=\"fail\", or ensure the field is mapped as KEYWORD in all indices.", + String.join(", ", offending) + ); + } + return plan; + } + + private static void collectOffendingFromResolutions(Collection resolutions, Set offending) { + for (IndexResolution resolution : resolutions) { + if (resolution.isValid() == false) { + continue; + } + EsIndex index = resolution.get(); + for (String fieldName : index.partiallyUnmappedFields()) { + EsField field = resolveFieldByPath(index.mapping(), fieldName); + if (field == null) { + continue; + } + if (field instanceof InvalidMappedField) { + offending.add(fieldName); + } else if (field.getDataType() != KEYWORD) { + offending.add(fieldName); + } + } + } + } + + /** + * Resolve a dotted field path (e.g. "a.b.c") in the given root mapping. + * Returns the leaf EsField or null if any segment is missing. + */ + private static EsField resolveFieldByPath(Map root, String path) { + if (path == null || path.isEmpty()) { + return null; + } + String[] segments = path.split("\\."); + Map current = root; + EsField field = null; + for (int i = 0; i < segments.length; i++) { + field = current.get(segments[i]); + if (field == null) { + return null; + } + if (i < segments.length - 1) { + Map props = field.getProperties(); + if (props == null || props.isEmpty()) { + return null; + } + current = props; + } + } + return field; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java index 5996b2fb681a7..4a2b4067ef8fd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/FieldNameUtils.java @@ -48,14 +48,13 @@ import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin; import org.elasticsearch.xpack.esql.session.EsqlSession.PreAnalysisResult; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Set; import java.util.function.BiConsumer; -import java.util.stream.Stream; -import static java.util.stream.Collectors.toSet; import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; public class FieldNameUtils { @@ -297,14 +296,48 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha // there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index return new PreAnalysisResult(IndexResolver.INDEX_METADATA_FIELD, wildcardJoinIndices); } else { - HashSet allFields = new HashSet<>(fieldNames.stream().flatMap(FieldNameUtils::withSubfields).collect(toSet())); + Set allFields = new HashSet<>(); + for (String name : fieldNames) { + addSubfields(name, allFields); + } allFields.add(MetadataAttribute.INDEX); return new PreAnalysisResult(allFields, wildcardJoinIndices); } } - private static Stream withSubfields(String name) { - return name.endsWith(WILDCARD) ? Stream.of(name) : Stream.of(name, name + ".*"); + /** + * Expands a field name into a set of names to request from field caps. For example, "a.b.c" will be expanded to: + *

    + *
  • The field itself: "a.b.c"
  • + *
  • Its multi-fields: "a.b.c.*"
  • + *
  • All dot-delimited parent prefixes: ["a", "a.b"]. This is needed to get back flattened parents, so the verifier can + * detect subfields of flattened.
  • + *
+ */ + private static void addSubfields(String name, Set allFields) { + allFields.add(name); + + if (name.endsWith(WILDCARD)) { + return; + } + + allFields.add(name + ".*"); + allFields.addAll(parentPrefixes(name)); + } + + /** + * Returns the dot-delimited parent prefixes of a field name. For example, "a.b.c" will return ["a", "a.b"] + */ + public static List parentPrefixes(String name) { + List prefixes = new ArrayList<>(); + int pos = name.indexOf('.'); + + while (pos > 0) { + prefixes.add(name.substring(0, pos)); + pos = name.indexOf('.', pos + 1); + } + + return prefixes; } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java index c7b75f7719f15..159bc5429c99e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.analysis; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; @@ -15,14 +16,26 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.plan.EsqlStatement; import org.elasticsearch.xpack.esql.plan.IndexPattern; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.Filter; import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.Project; import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; +import static java.util.Collections.emptyMap; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_PARSER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; @@ -32,6 +45,7 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzer; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultEnrichResolution; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexResolutions; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTests.withInlinestatsWarning; import static org.elasticsearch.xpack.esql.plan.QuerySettings.UNMAPPED_FIELDS; @@ -40,6 +54,7 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; // @TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug") public class AnalyzerUnmappedTests extends ESTestCase { @@ -528,6 +543,364 @@ public void testLoadModeDisallowsSubqueryAndFork() { verificationFailure(query, "FORK is not supported with unmapped_fields=\"load\""); } + private static final String UNMAPPED_TIMESTAMP_SUFFIX = UnresolvedTimestamp.UNRESOLVED_SUFFIX + Verifier.UNMAPPED_TIMESTAMP_SUFFIX; + + public void testTbucketWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTrangeWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | WHERE trange(1 hour)", "[trange(1 hour)] "); + } + + public void testTbucketAndTrangeWithUnmappedTimestamp() { + unmappedTimestampFailure( + "FROM test | WHERE trange(1 hour) | STATS c = COUNT(*) BY tbucket(1 hour)", + "[tbucket(1 hour)] ", + "[trange(1 hour)] " + ); + } + + public void testRateWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS rate(salary)", "[rate(salary)] "); + } + + public void testIrateWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS irate(salary)", "[irate(salary)] "); + } + + public void testDeltaWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS delta(salary)", "[delta(salary)] "); + } + + public void testIdeltaWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS idelta(salary)", "[idelta(salary)] "); + } + + public void testIncreaseWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS increase(salary)", "[increase(salary)] "); + } + + public void testDerivWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS deriv(salary)", "[deriv(salary)] "); + } + + public void testFirstOverTimeWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS first_over_time(salary)", "[first_over_time(salary)] "); + } + + public void testLastOverTimeWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS last_over_time(salary)", "[last_over_time(salary)] "); + } + + public void testRateAndTbucketWithUnmappedTimestamp() { + unmappedTimestampFailure("FROM test | STATS rate(salary) BY tbucket(1 hour)", "[rate(salary)] ", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampAfterWhere() { + unmappedTimestampFailure("FROM test | WHERE emp_no > 10 | STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampAfterEval() { + unmappedTimestampFailure("FROM test | EVAL x = salary + 1 | STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampMultipleGroupings() { + unmappedTimestampFailure("FROM test | STATS c = COUNT(*) BY tbucket(1 hour), emp_no", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampAfterRename() { + unmappedTimestampFailure("FROM test | RENAME emp_no AS e | STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampAfterDrop() { + unmappedTimestampFailure("FROM test | DROP emp_no | STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTrangeWithUnmappedTimestampCompoundWhere() { + unmappedTimestampFailure("FROM test | WHERE trange(1 hour) AND emp_no > 10", "[trange(1 hour)] "); + } + + public void testTrangeWithUnmappedTimestampAfterEval() { + unmappedTimestampFailure("FROM test | EVAL x = salary + 1 | WHERE trange(1 hour)", "[trange(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampInInlineStats() { + unmappedTimestampFailure("FROM test | INLINE STATS c = COUNT(*) BY tbucket(1 hour)", "[tbucket(1 hour)] "); + } + + public void testTbucketWithUnmappedTimestampWithFork() { + var query = "FROM test | FORK (STATS c = COUNT(*) BY tbucket(1 hour)) (STATS d = COUNT(*) BY emp_no)"; + for (var statement : List.of(setUnmappedNullify(query), setUnmappedLoad(query))) { + var e = expectThrows(VerificationException.class, () -> analyzeStatement(statement)); + assertThat(e.getMessage(), containsString("[tbucket(1 hour)] ")); + assertThat(e.getMessage(), not(containsString("FORK is not supported"))); + } + } + + public void testTbucketWithUnmappedTimestampWithLookupJoin() { + var query = """ + FROM test + | EVAL language_code = languages + | LOOKUP JOIN languages_lookup ON language_code + | STATS c = COUNT(*) BY tbucket(1 hour) + """; + for (var statement : List.of(setUnmappedNullify(query), setUnmappedLoad(query))) { + var e = expectThrows(VerificationException.class, () -> analyzeStatement(statement)); + assertThat(e.getMessage(), containsString("[tbucket(1 hour)] ")); + assertThat(e.getMessage(), not(containsString("LOOKUP JOIN is not supported"))); + } + } + + public void testTbucketWithTimestampPresent() { + var query = "FROM sample_data | STATS c = COUNT(*) BY tbucket(1 hour)"; + for (var statement : List.of(setUnmappedNullify(query), setUnmappedLoad(query))) { + var plan = analyzeStatement(statement); + var limit = as(plan, Limit.class); + var aggregate = as(limit.child(), Aggregate.class); + var relation = as(aggregate.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("sample_data")); + assertTimestampInOutput(relation); + } + } + + public void testTrangeWithTimestampPresent() { + var query = "FROM sample_data | WHERE trange(1 hour)"; + for (var statement : List.of(setUnmappedNullify(query), setUnmappedLoad(query))) { + var plan = analyzeStatement(statement); + var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); + var relation = as(filter.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("sample_data")); + assertTimestampInOutput(relation); + } + } + + public void testTbucketTimestampPresentButDroppedNullify() { + var e = expectThrows( + VerificationException.class, + () -> analyzeStatement(setUnmappedNullify("FROM sample_data | DROP @timestamp | STATS c = COUNT(*) BY tbucket(1 hour)")) + ); + assertThat(e.getMessage(), containsString(UnresolvedTimestamp.UNRESOLVED_SUFFIX)); + assertThat(e.getMessage(), not(containsString(Verifier.UNMAPPED_TIMESTAMP_SUFFIX))); + } + + public void testTbucketTimestampPresentButRenamedNullify() { + var e = expectThrows( + VerificationException.class, + () -> analyzeStatement(setUnmappedNullify("FROM sample_data | RENAME @timestamp AS ts | STATS c = COUNT(*) BY tbucket(1 hour)")) + ); + assertThat(e.getMessage(), containsString(UnresolvedTimestamp.UNRESOLVED_SUFFIX)); + assertThat(e.getMessage(), not(containsString(Verifier.UNMAPPED_TIMESTAMP_SUFFIX))); + } + + private static void assertTimestampInOutput(EsRelation relation) { + assertTrue( + "@timestamp field should be present in the EsRelation output", + relation.output().stream().anyMatch(a -> MetadataAttribute.TIMESTAMP_FIELD.equals(a.name())) + ); + } + + private void unmappedTimestampFailure(String query, String... expectedFailures) { + for (var statement : List.of(setUnmappedNullify(query), setUnmappedLoad(query))) { + var e = expectThrows(VerificationException.class, () -> analyzeStatement(statement)); + for (String expected : expectedFailures) { + assertThat(e.getMessage(), containsString(expected + UNMAPPED_TIMESTAMP_SUFFIX)); + } + } + } + + /** + * Verify that referencing a sub-field of a flattened field (e.g. "foo.bar" when "foo" is flattened) is rejected + */ + public void testFlattenedSubFieldRejection() { + Map mapping = Map.of("field", new UnsupportedEsField("field", List.of("flattened"))); + EsIndex index = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD), Map.of(), Map.of(), Set.of()); + String errorMessage = + "Loading subfield [%s] when parent [%s] is of flattened field type is not supported with unmapped_fields=\"load\""; + + verificationFailure( + setUnmappedLoad("FROM test | KEEP field.a"), + index, + String.format(Locale.ROOT, errorMessage, "field.a", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | STATS x = SAMPLE(field.a, 1)"), + index, + String.format(Locale.ROOT, errorMessage, "field.a", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | EVAL x = TO_STRING(field.a)"), + index, + String.format(Locale.ROOT, errorMessage, "field.a", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | KEEP field.a.b"), + index, + String.format(Locale.ROOT, errorMessage, "field.a.b", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | KEEP field.a.b.c"), + index, + String.format(Locale.ROOT, errorMessage, "field.a.b.c", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | SORT field.x, field.z"), + index, + String.format(Locale.ROOT, errorMessage, "field.x", "field"), + String.format(Locale.ROOT, errorMessage, "field.z", "field") + ); + verificationFailure( + setUnmappedLoad("FROM test | SORT field.x | KEEP field.z"), + index, + String.format(Locale.ROOT, errorMessage, "field.x", "field"), + String.format(Locale.ROOT, errorMessage, "field.z", "field") + ); + } + + private void verificationFailure(String query, EsIndex index, String... expectedFailures) { + EsqlStatement statement = TEST_PARSER.createStatement(query); + Map indexResolutions = Map.of( + new IndexPattern(Source.EMPTY, index.name()), + IndexResolution.valid(index) + ); + Analyzer analyzer = AnalyzerTestUtils.analyzer(indexResolutions, TEST_VERIFIER, configuration(query), statement); + var e = expectThrows(VerificationException.class, () -> analyzer.analyze(statement.plan())); + for (String expectedFailure : expectedFailures) { + assertThat(e.getMessage(), containsString(expectedFailure)); + } + } + + /** + * When unmapped_fields=load and an index has a partially mapped field that is not KEYWORD (e.g. LONG), + * the query must fail with VerificationException. + * Covers one offending field; see {@link #testDisallowLoadWithPartiallyMappedNonKeywordReportsAllFields} for multiple. + */ + public void testDisallowLoadWithPartiallyMappedNonKeyword() { + assumeTrue("Requires OPTIONAL_FIELDS_V2", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V2.isEnabled()); + + var esIndex = new EsIndex( + "partial_idx", + Map.of("partial_long", new EsField("partial_long", DataType.LONG, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)), + Map.of("partial_idx", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of("partial_long") + ); + var analyzer = analyzer( + indexResolutions(esIndex), + defaultLookupResolution(), + defaultEnrichResolution(), + TEST_VERIFIER, + TEST_CFG, + UnmappedResolution.LOAD + ); + var statement = TEST_PARSER.createStatement("FROM partial_idx | KEEP partial_long"); + + var e = expectThrows(VerificationException.class, () -> analyzer.analyze(statement.plan())); + assertThat(e.getMessage(), containsString("unmapped_fields=\"load\"")); + assertThat(e.getMessage(), containsString("partial_long")); + } + + /** + * When multiple partially mapped non-KEYWORD fields exist, all must be reported in the error message. + * Guards against regressions where only the first offending field is reported. + */ + public void testDisallowLoadWithPartiallyMappedNonKeywordReportsAllFields() { + assumeTrue("Requires OPTIONAL_FIELDS_V2", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V2.isEnabled()); + + var esIndex = new EsIndex( + "partial_idx", + Map.of( + "partial_long", + new EsField("partial_long", DataType.LONG, emptyMap(), true, EsField.TimeSeriesFieldType.NONE), + "partial_double", + new EsField("partial_double", DataType.DOUBLE, emptyMap(), true, EsField.TimeSeriesFieldType.NONE) + ), + Map.of("partial_idx", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of("partial_long", "partial_double") + ); + var analyzer = analyzer( + indexResolutions(esIndex), + defaultLookupResolution(), + defaultEnrichResolution(), + TEST_VERIFIER, + TEST_CFG, + UnmappedResolution.LOAD + ); + var statement = TEST_PARSER.createStatement("FROM partial_idx | KEEP partial_long, partial_double"); + + var e = expectThrows(VerificationException.class, () -> analyzer.analyze(statement.plan())); + assertThat(e.getMessage(), containsString("unmapped_fields=\"load\"")); + assertThat(e.getMessage(), containsString("partial_long")); + assertThat(e.getMessage(), containsString("partial_double")); + } + + /** + * When unmapped_fields=load and the only partially mapped field is KEYWORD, the query is allowed. + * Contrast with {@link #testDisallowLoadWithPartiallyMappedNonKeyword} for non-KEYWORD types. + */ + public void testAllowLoadWithPartiallyMappedKeyword() { + assumeTrue("Requires OPTIONAL_FIELDS_V2", EsqlCapabilities.Cap.OPTIONAL_FIELDS_V2.isEnabled()); + + var esIndex = new EsIndex( + "partial_idx", + Map.of( + "partial_type_keyword", + new EsField("partial_type_keyword", DataType.KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE) + ), + Map.of("partial_idx", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of("partial_type_keyword") + ); + var analyzer = analyzer( + indexResolutions(esIndex), + defaultLookupResolution(), + defaultEnrichResolution(), + TEST_VERIFIER, + TEST_CFG, + UnmappedResolution.LOAD + ); + var statement = TEST_PARSER.createStatement("FROM partial_idx | KEEP partial_type_keyword"); + + // Should not throw + var plan = analyzer.analyze(statement.plan()); + assertThat(plan, not(nullValue())); + } + + /** + * When unmapped_fields=nullify, partially mapped non-KEYWORD fields do not cause failure. + * The restriction applies only to unmapped_fields=load; nullify and fail remain allowed. + */ + public void testNullifyWithPartiallyMappedNonKeywordDoesNotFail() { + assumeTrue("Requires OPTIONAL_FIELDS_NULLIFY_TECH_PREVIEW", EsqlCapabilities.Cap.OPTIONAL_FIELDS_NULLIFY_TECH_PREVIEW.isEnabled()); + + var esIndex = new EsIndex( + "partial_idx", + Map.of("partial_long", new EsField("partial_long", DataType.LONG, emptyMap(), true, EsField.TimeSeriesFieldType.NONE)), + Map.of("partial_idx", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of("partial_long") + ); + var analyzer = analyzer( + indexResolutions(esIndex), + defaultLookupResolution(), + defaultEnrichResolution(), + TEST_VERIFIER, + TEST_CFG, + UnmappedResolution.NULLIFY + ); + var statement = TEST_PARSER.createStatement("FROM partial_idx | KEEP partial_long"); + + // Should not throw + var plan = analyzer.analyze(statement.plan()); + assertThat(plan, not(nullValue())); + } + private void verificationFailure(String statement, String expectedFailure) { var e = expectThrows(VerificationException.class, () -> analyzeStatement(statement)); assertThat(e.getMessage(), containsString(expectedFailure)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java index 5cf6bc58acfeb..3f4a87c7bc0c7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/FieldNameUtilsTests.java @@ -141,7 +141,8 @@ public void testConvertFromIntAndLong() { "salary_change.int.*", "salary_change.int", "salary_change.long.*", - "salary_change.long" + "salary_change.long", + "salary_change" ) ); } @@ -160,7 +161,16 @@ public void testLongToLong() { | where languages.long < avg_worked_seconds | limit 1 | keep emp_no""", - Set.of("_index", "emp_no", "emp_no.*", "languages.long", "languages.long.*", "avg_worked_seconds", "avg_worked_seconds.*") + Set.of( + "_index", + "emp_no", + "emp_no.*", + "languages.long", + "languages.long.*", + "avg_worked_seconds", + "avg_worked_seconds.*", + "languages" + ) ); } @@ -908,7 +918,7 @@ public void testMvSum() { | eval salary_change = mv_sum(salary_change.int) | sort emp_no | keep emp_no, salary_change.int, salary_change - | limit 7""", Set.of("_index", "emp_no", "emp_no.*", "salary_change.int", "salary_change.int.*")); + | limit 7""", Set.of("_index", "emp_no", "emp_no.*", "salary_change.int", "salary_change.int.*", "salary_change")); } public void testMetaIndexAliasedInAggs() { @@ -977,7 +987,10 @@ public void testRenameDrop() { } public void testMaxOfLong() { - assertFieldNames("from employees | stats l = max(languages.long)", Set.of("_index", "languages.long", "languages.long.*")); + assertFieldNames( + "from employees | stats l = max(languages.long)", + Set.of("_index", "languages.long", "languages.long.*", "languages") + ); } public void testGroupByAlias() { @@ -1015,11 +1028,14 @@ public void testByStringAndString() { } public void testByLongAndLong() { - assertFieldNames(""" - from employees - | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 - | stats c = count(languages.long) by languages.long, trunk_worked_seconds - | sort c desc""", Set.of("_index", "avg_worked_seconds", "avg_worked_seconds.*", "languages.long", "languages.long.*")); + assertFieldNames( + """ + from employees + | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 + | stats c = count(languages.long) by languages.long, trunk_worked_seconds + | sort c desc""", + Set.of("_index", "avg_worked_seconds", "avg_worked_seconds.*", "languages.long", "languages.long.*", "languages") + ); } public void testByDateAndKeywordAndIntWithAlias() { @@ -1057,7 +1073,7 @@ public void testPercentileOfLong() { """ from employees | stats p0 = percentile(salary_change.long, 0), p50 = percentile(salary_change.long, 50)""", - Set.of("_index", "salary_change.long", "salary_change.long.*") + Set.of("_index", "salary_change", "salary_change.long", "salary_change.long.*") ); } @@ -1125,7 +1141,16 @@ public void testByUnmentionedLongAndLong() { | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 | stats c = count(gender) by languages.long, trunk_worked_seconds | sort c desc""", - Set.of("_index", "avg_worked_seconds", "avg_worked_seconds.*", "languages.long", "languages.long.*", "gender", "gender.*") + Set.of( + "_index", + "avg_worked_seconds", + "avg_worked_seconds.*", + "languages.long", + "languages.long.*", + "gender", + "gender.*", + "languages" + ) ); } @@ -1635,7 +1660,8 @@ public void testMetrics() { "network.total_cost", "network.total_cost.*", "cluster", - "cluster.*" + "cluster.*", + "network" ) ); @@ -1972,20 +1998,24 @@ public void testJoinMaskingKeep() { "language_name.*", "message.*", "type.*", - "language.name.*" + "language.name.*", + "language" ) ); } public void testJoinMaskingKeep2() { - assertFieldNames(""" - from languag* - | eval type = "foo" - | rename type as message - | lookup join message_types_lookup on message - | rename type as message - | lookup join message_types_lookup on message - | keep `language.name`""", Set.of("_index", "language.name", "type", "message", "message.*", "type.*", "language.name.*")); + assertFieldNames( + """ + from languag* + | eval type = "foo" + | rename type as message + | lookup join message_types_lookup on message + | rename type as message + | lookup join message_types_lookup on message + | keep `language.name`""", + Set.of("_index", "language.name", "type", "message", "message.*", "type.*", "language.name.*", "language") + ); } public void testEnrichMaskingEvalOn() { @@ -2846,7 +2876,11 @@ public void testForkAfterEnrich() { "city.country.continent.planet.name.*", "city.name.*", "city.country.name.*", - "airport.*" + "airport.*", + "city", + "city.country", + "city.country.continent", + "city.country.continent.planet" ) ); } @@ -2869,7 +2903,11 @@ public void testForkBranchWithEnrich() { "city.country.continent.planet.name.*", "city.name.*", "city.country.name.*", - "airport.*" + "airport.*", + "city", + "city.country", + "city.country.continent", + "city.country.continent.planet" ) ); } @@ -2893,7 +2931,11 @@ public void testForkBeforeEnrich() { "city.country.continent.planet.name.*", "city.name.*", "city.country.name.*", - "airport.*" + "airport.*", + "city", + "city.country", + "city.country.continent", + "city.country.continent.planet" ) ); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/260_flattened_subfield.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/260_flattened_subfield.yml new file mode 100644 index 0000000000000..4006bfec36725 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/260_flattened_subfield.yml @@ -0,0 +1,109 @@ +--- +setup: + - requires: + test_runner_features: [ capabilities, contains ] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [ optional_fields_v2, load_flattened_field ] + reason: "Loading a subfield a flattened field must fail when unmapped_fields=load" + - do: + indices.create: + index: index1 + body: + mappings: + dynamic: false + properties: + foo: + type: flattened + - do: + indices.create: + index: index2 + body: + mappings: + dynamic: false + properties: + foo.bar: + type: flattened + - do: + indices.create: + index: index3 + body: + mappings: + dynamic: false + properties: + foo: + type: keyword + - do: + indices.create: + index: index4 + body: + mappings: + dynamic: false + properties: + foo.bar: + type: keyword + +--- +'loading subfield of flattened 1': + - do: + catch: bad_request + esql.query: + body: + # [foo] is flattened in index1 + query: 'SET unmapped_fields="load"; FROM index1 | KEEP foo.a' + + - match: { error.type: verification_exception } + - contains: { error.reason: 'Loading subfield [foo.a] when parent [foo] is of flattened field type is not supported with unmapped_fields="load"' } + +--- +'loading subfield of flattened 2': + - do: + catch: bad_request + esql.query: + body: + # [foo] is flattened in index1 + query: 'SET unmapped_fields="load"; FROM index1 | KEEP foo.bar.baz' + + - match: { error.type: verification_exception } + - contains: { error.reason: 'Loading subfield [foo.bar.baz] when parent [foo] is of flattened field type is not supported with unmapped_fields="load"' } + +--- +'loading subfield of flattened 3': + - do: + catch: bad_request + esql.query: + body: + # [foo.bar] is flattened in index2 + query: 'SET unmapped_fields="load"; FROM index2 | WHERE foo.bar.subfield == "something"' + + - match: { error.type: verification_exception } + - contains: { error.reason: 'Loading subfield [foo.bar.subfield] when parent [foo.bar] is of flattened field type is not supported with unmapped_fields="load"' } + +--- +'loading from two indices with different mappings 1': + - do: + catch: bad_request + esql.query: + body: + # [foo] is flattened in index1 + # [foo] is keyword in index3 + query: 'SET unmapped_fields="load"; FROM index1,index3 | EVAL x = TO_STRING(foo.a), x = TO_STRING(foo.b.c.d)' + + - match: { error.type: verification_exception } + - contains: { error.reason: 'Loading subfield [foo.a] when parent [foo] is of flattened field type is not supported with unmapped_fields="load"' } + - contains: { error.reason: 'Loading subfield [foo.b.c.d] when parent [foo] is of flattened field type is not supported with unmapped_fields="load"' } + +--- +'loading from two indices with different mappings 2': + - do: + catch: bad_request + esql.query: + body: + # [foo] is flattened in index1 + # [foo.bar] is keyword in index4 + query: 'SET unmapped_fields="load"; FROM index1,index4 | WHERE foo.bar == "something"' + + - match: { error.type: verification_exception } + - contains: { error.reason: 'Cannot use field [foo.bar] due to ambiguities being mapped as [2] incompatible types: [keyword] enforced by INSIST command, and [unsupported] in index mappings' }