Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.analysis.AnalyzerRules.ParameterizedAnalyzerRule;
import org.elasticsearch.xpack.esql.analysis.rules.DisallowLoadWithPartiallyMappedNonKeyword;
import org.elasticsearch.xpack.esql.analysis.rules.ResolveUnmapped;
import org.elasticsearch.xpack.esql.analysis.rules.ResolvedProjects;
import org.elasticsearch.xpack.esql.capabilities.ConfigurationAware;
Expand Down Expand Up @@ -249,6 +250,7 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
new InsertFromAggregateMetricDouble(),
new TimeSeriesGroupByAll(),
new ResolveUnionTypesInUnionAll(),
new DisallowLoadWithPartiallyMappedNonKeyword(),
new ResolveUnmapped()
),
new Batch<>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.capabilities.Unresolvable;
import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp;
import org.elasticsearch.xpack.esql.core.expression.function.Function;
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
import org.elasticsearch.xpack.esql.core.tree.Node;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField;
import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField;
import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.expression.function.TimestampAware;
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg;
Expand All @@ -44,6 +50,7 @@
import org.elasticsearch.xpack.esql.plan.logical.Subquery;
import org.elasticsearch.xpack.esql.plan.logical.UnionAll;
import org.elasticsearch.xpack.esql.plan.logical.promql.PromqlCommand;
import org.elasticsearch.xpack.esql.session.FieldNameUtils;
import org.elasticsearch.xpack.esql.telemetry.FeatureMetric;
import org.elasticsearch.xpack.esql.telemetry.Metrics;

Expand All @@ -69,6 +76,8 @@
*/
public class Verifier {

static final String UNMAPPED_TIMESTAMP_SUFFIX = "; the [unmapped_fields] setting does not apply to the implicit @timestamp reference";

/**
* Extra plan verification checks defined in plugins.
*/
Expand Down Expand Up @@ -104,9 +113,10 @@ Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics) {
Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics, UnmappedResolution unmappedResolution) {
assert partialMetrics != null;
Failures failures = new Failures();
boolean unmappedTimestampHandled = unmappedResolution == UnmappedResolution.FAIL || isTimestampUnmappedInAllIndices(plan, failures);

// quick verification for unresolved attributes
checkUnresolvedAttributes(plan, failures);
checkUnresolvedAttributes(plan, failures, unmappedTimestampHandled);

ConfigurationAware.verifyNoMarkerConfiguration(plan, failures);

Expand All @@ -117,6 +127,7 @@ Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics, UnmappedReso

if (unmappedResolution == UnmappedResolution.LOAD) {
checkLoadModeDisallowedCommands(plan, failures);
checkFlattenedSubFieldLoad(plan, failures);
checkLoadModeDisallowedFunctions(plan, failures);
}

Expand Down Expand Up @@ -158,7 +169,7 @@ Collection<Failure> verify(LogicalPlan plan, BitSet partialMetrics, UnmappedReso
return failures.failures();
}

private static void checkUnresolvedAttributes(LogicalPlan plan, Failures failures) {
private static void checkUnresolvedAttributes(LogicalPlan plan, Failures failures, boolean skipUnresolvedTimestamp) {
plan.forEachUp(p -> {
// if the children are unresolved, so will this node; counting it will only add noise
if (p.childrenResolved() == false) {
Expand Down Expand Up @@ -191,6 +202,9 @@ else if (p.resolved()) {
return;
}

if (skipUnresolvedTimestamp && ae instanceof UnresolvedTimestamp) {
return;
}
if (ae instanceof Unresolvable u) {
failures.add(fail(ae, u.unresolvedMessage()));
}
Expand Down Expand Up @@ -363,6 +377,38 @@ private static void checkLimitBy(LogicalPlan plan, Failures failures) {
}
}

/**
* The {@code unmapped_fields} setting does not apply to the implicit {@code @timestamp} reference ({@link TimestampAware} functions).
* Only emits the specific message when {@code @timestamp} is truly absent from all source index mappings;
* if the field was present but dropped/renamed by the query, the generic unresolved-attribute message is more appropriate.
* See https://github.com/elastic/elasticsearch/issues/142127
*/
private static boolean isTimestampUnmappedInAllIndices(LogicalPlan plan, Failures failures) {
if (plan.anyMatch(p -> p instanceof EsRelation r && r.indexMode() != IndexMode.LOOKUP && hasTimestamp(r))) {
return false;
}
plan.forEachDown(p -> {
if (p instanceof TimestampAware ta && ta.timestamp() instanceof UnresolvedTimestamp) {
failures.add(fail(p, "[{}] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX + UNMAPPED_TIMESTAMP_SUFFIX, p.sourceText()));
}
p.forEachExpression(Expression.class, e -> {
if (e instanceof TimestampAware ta && ta.timestamp() instanceof UnresolvedTimestamp) {
failures.add(fail(e, "[{}] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX + UNMAPPED_TIMESTAMP_SUFFIX, e.sourceText()));
}
});
});
return true;
}

private static boolean hasTimestamp(EsRelation relation) {
for (Attribute attr : relation.output()) {
if (MetadataAttribute.TIMESTAMP_FIELD.equals(attr.name())) {
return true;
}
}
return false;
}

/**
* {@code unmapped_fields="load"} does not yet support branching commands (FORK, LOOKUP JOIN, subqueries/views).
* See https://github.com/elastic/elasticsearch/issues/142033
Expand All @@ -381,6 +427,62 @@ private static void checkLoadModeDisallowedCommands(LogicalPlan plan, Failures f
});
}

/**
* Reject loading sub-fields of flattened fields when {@code unmapped_fields="load"}, by checking if any
* {@link PotentiallyUnmappedKeywordEsField} is a sub-field of a parent field whose original type is flattened. The reason is that
* flattened subfields resolution may eventually differ from what happens when {@code unmapped_fields="load"}.
*/
private static void checkFlattenedSubFieldLoad(LogicalPlan plan, Failures failures) {
plan.forEachDown(p -> {
if (p instanceof EsRelation == false) {
return;
}

EsRelation esRelation = (EsRelation) p;
Set<String> flattenedFieldNames = flattenedFieldNames(esRelation.output());

if (flattenedFieldNames.isEmpty()) {
return;
}

for (Attribute attr : esRelation.output()) {
if (!(attr instanceof FieldAttribute fa && fa.field() instanceof PotentiallyUnmappedKeywordEsField)) {
continue;
}

String name = fa.name();
List<String> prefixes = FieldNameUtils.parentPrefixes(name);
for (String parent : prefixes) {
if (flattenedFieldNames.contains(parent)) {
Failure failure = fail(
fa,
"Loading subfield [{}] when parent [{}] is of flattened field type is not supported with "
+ "unmapped_fields=\"load\"",
name,
parent
);
failures.add(failure);
break;
}
}
}
});
}

private static Set<String> flattenedFieldNames(List<Attribute> attributes) {
Set<String> names = new HashSet<>();

for (Attribute attribute : attributes) {
if (attribute instanceof FieldAttribute fa
&& fa.field() instanceof UnsupportedEsField uef
&& uef.getOriginalTypes().contains("flattened")) {
names.add(fa.name());
}
}

return names;
}

/**
* Disallow full-text search when unmapped_fields=load. We do not restrict to "only when the FTF
* is applied to an unmapped field" because FTFs like KQL can reference unmapped fields inside the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.analysis.rules;

import org.elasticsearch.index.IndexMode;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.analysis.AnalyzerContext;
import org.elasticsearch.xpack.esql.analysis.AnalyzerRules;
import org.elasticsearch.xpack.esql.analysis.UnmappedResolution;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.core.type.InvalidMappedField;
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.plan.IndexPattern;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;

/**
* Fails the query when {@code SET unmapped_fields="load"} is used and any index has a field that is
* partially mapped (present in some indices, unmapped in others) and whose type where it is mapped
* is not KEYWORD. This is a temporary restriction until semantics are finalized.
* <p>
* Implemented as an analyzer rule (rather than in {@link org.elasticsearch.xpack.esql.analysis.Verifier})
* because this check requires index resolution context (e.g. {@link EsIndex#partiallyUnmappedFields()}).
* All offending field names are collected and reported in a single exception.
*/
public class DisallowLoadWithPartiallyMappedNonKeyword extends AnalyzerRules.ParameterizedAnalyzerRule<LogicalPlan, AnalyzerContext> {

@Override
protected boolean skipResolved() {
return false; // run on all nodes so we always check indices in the plan
}

@Override
protected LogicalPlan rule(LogicalPlan plan, AnalyzerContext context) {
if (context.unmappedResolution() != UnmappedResolution.LOAD) {
return plan;
}
// Only check indices that appear in the plan (EsRelation nodes)
List<IndexResolution> resolutionsInPlan = new ArrayList<>();
plan.forEachDown(EsRelation.class, relation -> {
if (relation.indexMode() == IndexMode.LOOKUP) {
IndexResolution r = context.lookupResolution().get(relation.indexPattern());
if (r != null) {
resolutionsInPlan.add(r);
}
} else {
IndexResolution r = context.indexResolution().get(new IndexPattern(relation.source(), relation.indexPattern()));
if (r != null) {
resolutionsInPlan.add(r);
}
}
});
Set<String> offending = new TreeSet<>();
collectOffendingFromResolutions(resolutionsInPlan, offending);
if (offending.isEmpty() == false) {
throw new VerificationException(
"unmapped_fields=\"load\" is not supported when the query involves partially mapped fields that are not KEYWORD: {}. "
+ "Use unmapped_fields=\"nullify\" or unmapped_fields=\"fail\", or ensure the field is mapped as KEYWORD in all indices.",
String.join(", ", offending)
);
}
return plan;
}

private static void collectOffendingFromResolutions(Collection<IndexResolution> resolutions, Set<String> offending) {
for (IndexResolution resolution : resolutions) {
if (resolution.isValid() == false) {
continue;
}
EsIndex index = resolution.get();
for (String fieldName : index.partiallyUnmappedFields()) {
EsField field = resolveFieldByPath(index.mapping(), fieldName);
if (field == null) {
continue;
}
if (field instanceof InvalidMappedField) {
offending.add(fieldName);
} else if (field.getDataType() != KEYWORD) {
offending.add(fieldName);
}
}
}
}

/**
* Resolve a dotted field path (e.g. "a.b.c") in the given root mapping.
* Returns the leaf EsField or null if any segment is missing.
*/
private static EsField resolveFieldByPath(Map<String, EsField> root, String path) {
if (path == null || path.isEmpty()) {
return null;
}
String[] segments = path.split("\\.");
Map<String, EsField> current = root;
EsField field = null;
for (int i = 0; i < segments.length; i++) {
field = current.get(segments[i]);
if (field == null) {
return null;
}
if (i < segments.length - 1) {
Map<String, EsField> props = field.getProperties();
if (props == null || props.isEmpty()) {
return null;
}
current = props;
}
}
return field;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,13 @@
import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin;
import org.elasticsearch.xpack.esql.session.EsqlSession.PreAnalysisResult;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.stream.Stream;

import static java.util.stream.Collectors.toSet;
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;

public class FieldNameUtils {
Expand Down Expand Up @@ -297,14 +296,48 @@ public static PreAnalysisResult resolveFieldNames(LogicalPlan parsed, boolean ha
// there cannot be an empty list of fields, we'll ask the simplest and lightest one instead: _index
return new PreAnalysisResult(IndexResolver.INDEX_METADATA_FIELD, wildcardJoinIndices);
} else {
HashSet<String> allFields = new HashSet<>(fieldNames.stream().flatMap(FieldNameUtils::withSubfields).collect(toSet()));
Set<String> allFields = new HashSet<>();
for (String name : fieldNames) {
addSubfields(name, allFields);
}
allFields.add(MetadataAttribute.INDEX);
return new PreAnalysisResult(allFields, wildcardJoinIndices);
}
}

private static Stream<String> withSubfields(String name) {
return name.endsWith(WILDCARD) ? Stream.of(name) : Stream.of(name, name + ".*");
/**
* Expands a field name into a set of names to request from field caps. For example, "a.b.c" will be expanded to:
* <ul>
* <li>The field itself: "a.b.c"</li>
* <li>Its multi-fields: "a.b.c.*"</li>
* <li>All dot-delimited parent prefixes: ["a", "a.b"]. This is needed to get back flattened parents, so the verifier can
* detect subfields of flattened.</li>
* </ul>
*/
private static void addSubfields(String name, Set<String> allFields) {
allFields.add(name);

if (name.endsWith(WILDCARD)) {
return;
}

allFields.add(name + ".*");
allFields.addAll(parentPrefixes(name));
}

/**
* Returns the dot-delimited parent prefixes of a field name. For example, "a.b.c" will return ["a", "a.b"]
*/
public static List<String> parentPrefixes(String name) {
List<String> prefixes = new ArrayList<>();
int pos = name.indexOf('.');

while (pos > 0) {
prefixes.add(name.substring(0, pos));
pos = name.indexOf('.', pos + 1);
}

return prefixes;
}

/**
Expand Down
Loading
Loading