Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/141340.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
area: ES|QL
issues: []
pr: 141340
summary: Skip nullifying aliases for Aggregate groups.
type: bug
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ public static FieldAttribute fieldAttribute() {
return fieldAttribute(randomAlphaOfLength(10), randomFrom(DataType.types()));
}

// TODO: deduplicate some of the `FieldAttribute field(String name, DataType type)` methods in the ESQL tests (currently 6)
public static FieldAttribute fieldAttribute(String name, DataType type) {
return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean(), EsField.TimeSeriesFieldType.NONE));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,45 @@ s:long | bar:null
0 | null
;

statsGroupAliasShadowingSourceColumnNoFilter
required_capability: optional_fields_nullify_skip_group_aliases

SET unmapped_fields="nullify"\;
FROM languages
| STATS c = COUNT(*) BY language_code = does_not_exist
;

c:long |language_code:null
4 |null
;

statsGroupAliasShadowingSourceColumnWithFilter
required_capability: optional_fields_nullify_skip_group_aliases

SET unmapped_fields="nullify"\;
FROM languages
| WHERE language_code == 1
| STATS c = COUNT(*) BY language_code = does_not_exist, language_name
;

c:long |language_code:null |language_name :keyword
1 |null |English
;

statsGroupAliasShadowingSourceColumnWithFilterAndAggExpression
required_capability: optional_fields_nullify_skip_group_aliases

SET unmapped_fields="nullify"\;
FROM languages
| WHERE language_code == 1
| STATS c = COUNT(*) + COALESCE(language_code, 0)
BY language_code = does_not_exist1::INTEGER + does_not_exist2::INTEGER + language_code, language_name
;

c:long |language_code:integer |language_name :keyword
1 |null |English
;

inlinestatsSum
required_capability: optional_fields_nullify_tech_preview
SET unmapped_fields="nullify"\;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ public enum Cap {
*/
OPTIONAL_FIELDS_NULLIFY_TECH_PREVIEW,

/**
* Don't nullify aliases for Aggregate groupings.
*/
OPTIONAL_FIELDS_NULLIFY_SKIP_GROUP_ALIASES,

/**
* Support specifically for *just* the _index METADATA field. Used by CsvTests, since that is the only metadata field currently
* supported.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,17 +659,25 @@ private List<? extends NamedExpression> maybeResolveAggregates(
List<? extends NamedExpression> aggregates = aggregate.aggregates();

ArrayList<Attribute> resolvedGroupings = new ArrayList<>(newGroupings.size());
Set<String> unresolvedGroupingNames = new HashSet<>(newGroupings.size());
for (Expression e : newGroupings) {
Attribute attr = Expressions.attribute(e);
if (attr != null && attr.resolved()) {
resolvedGroupings.add(attr);
if (attr != null) {
if (attr.resolved()) {
resolvedGroupings.add(attr);
} else {
unresolvedGroupingNames.add(attr.name());
}
}
}

boolean allGroupingsResolved = groupings.size() == resolvedGroupings.size();
if (allGroupingsResolved == false || Resolvables.resolved(aggregates) == false) {
Holder<Boolean> changed = new Holder<>(false);
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolvedGroupings, childrenOutput);
var inputAttributes = new ArrayList<>(childrenOutput);
// remove input attributes with the same name as unresolved groupings: could be shadowed by not yet resolved renamed groups
inputAttributes.removeIf(a -> unresolvedGroupingNames.contains(a.name()));
List<Attribute> resolvedList = NamedExpressions.mergeOutputAttributes(resolvedGroupings, inputAttributes);

List<NamedExpression> newAggregates = new ArrayList<>(aggregates.size());
// If no groupings are resolved, skip the resolution of the references to groupings in the aggregates, resolve the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp;
import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField;
import org.elasticsearch.xpack.esql.core.util.Holder;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Fork;
Expand All @@ -33,9 +33,11 @@
import org.elasticsearch.xpack.esql.plan.logical.Project;
import org.elasticsearch.xpack.esql.plan.logical.Row;
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
Expand Down Expand Up @@ -86,24 +88,25 @@ private static LogicalPlan resolve(LogicalPlan plan, boolean load) {

var transformed = load ? load(plan, unresolvedLinkedSet) : nullify(plan, unresolvedLinkedSet);

return transformed.equals(plan) ? plan : refreshPlan(transformed, unresolved);
return transformed == plan ? plan : refreshPlan(transformed, unresolved);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to this change, I found refreshUnresolved(LogicalPlan plan, List<UnresolvedAttribute> unresolved) method to be unnecessary, imho. I guess it boils down to one's style/preference, for me the code is too fragmented in few places with methods that are called only once. refreshPlan is imho better describing the logic if all the code is in that method.

}

/**
* The method introduces {@code EVAL missing_field = NULL}-equivalent into the plan, on top of the source, for every attribute in
* {@code unresolved}. It also "patches" the introduced attributes through the plan, where needed (like through Fork/UntionAll).
* {@code unresolved}.
*/
private static LogicalPlan nullify(LogicalPlan plan, Set<UnresolvedAttribute> unresolved) {
// insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it
var transformed = plan.transformUp(
n -> n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan,
p -> evalUnresolvedAtopUnary((UnaryPlan) p, nullAliases(unresolved))
);
// insert an Eval on top of those LeafPlan that are children of n-ary plans (could happen with UnionAll)
return transformed.transformUp(
n -> n instanceof UnaryPlan == false && n instanceof LeafPlan == false,
nAry -> evalUnresolvedAtopNary(nAry, nullAliases(unresolved))
);
private static LogicalPlan nullify(LogicalPlan plan, LinkedHashSet<UnresolvedAttribute> unresolved) {
return plan.transformUp(n -> {
// insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it
if (n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan) {
return evalUnresolvedAtopUnary(unary, nullAliases(unresolved));
}
// insert an Eval on top of those LeafPlan that are children of n-ary plans (LookupJoin, UnionAll)
if ((n instanceof UnaryPlan || n instanceof LeafPlan) == false) {
return evalUnresolvedBelowNary(n, unresolved);
}
return n;
});
}

/**
Expand Down Expand Up @@ -147,15 +150,13 @@ private static Fork patchFork(Fork fork) {
List<LogicalPlan> newChildren = new ArrayList<>(fork.children().size());
boolean childrenChanged = false;
for (var child : fork.children()) {
Holder<Boolean> patched = new Holder<>(false);
var transformed = child.transformDown(
// TODO add a suitable forEachDownMayReturnEarly equivalent
n -> patched.get() == false && n instanceof Project, // process top Project only (Fork-injected)
n -> {
patched.set(true);
return patchForkProject((Project) n);
var transformed = child.transformDownSkipBranch((n, skip) -> {
if (n instanceof Project project) {
n = patchForkProject(project);
skip.set(true); // process top Project only (Fork-injected)
}
);
return n;
});
childrenChanged |= transformed != child;
newChildren.add(transformed);
}
Expand All @@ -167,12 +168,14 @@ private static Fork patchFork(Fork fork) {
* by the evalUnresolvedAtopXXX methods and need to be "let through" the Project.
*/
private static Project patchForkProject(Project project) {
var projectOutput = project.output();
var childOutput = project.child().output();
List<Attribute> projectOutput = project.output();
List<Attribute> childOutput = project.child().output();
if (projectOutput.equals(childOutput) == false) {
List<Attribute> delta = new ArrayList<>(childOutput);
delta.removeAll(projectOutput);
project = project.withProjections(mergeOutputAttributes(delta, projectOutput));
if (delta.isEmpty() == false) {
project = project.withProjections(mergeOutputAttributes(delta, projectOutput));
}
}
return project;
}
Expand Down Expand Up @@ -201,12 +204,15 @@ private static LogicalPlan refreshUnresolved(LogicalPlan plan, List<UnresolvedAt
/**
* Inserts an Eval atop each child of the given {@code nAry}, if the child is a LeafPlan.
*/
private static LogicalPlan evalUnresolvedAtopNary(LogicalPlan nAry, List<Alias> nullAliases) {
private static LogicalPlan evalUnresolvedBelowNary(LogicalPlan nAry, LinkedHashSet<UnresolvedAttribute> unresolved) {
List<LogicalPlan> newChildren = new ArrayList<>(nAry.children().size());
boolean changed = false;
for (var child : nAry.children()) {
if (child instanceof LeafPlan source) {
if (child instanceof LeafPlan source
// skip right-sides of the Joins
&& (nAry instanceof Join == false || child == ((Join) nAry).left())) {
assertSourceType(source);
var nullAliases = removeShadowing(nullAliases(unresolved), source.output());
child = new Eval(source.source(), source, nullAliases);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to break the logic here because there doesn't seem to be a way to protect the Eval constructor in case nullAliases is empty (removeShadowing could change its content) I encountered an issue with the following query:

from employees | eval does_not_exist = does_not_exist2 | mv_expand does_not_exist | keep does_not_exist*

which results in

                "type": "illegal_state_exception",
                "reason": "Found 1 problem\nline 4:85: Plan [ProjectExec[[<no-fields>{r$}#81]]] optimized incorrectly due to missing references [<no-fields>{r$}#81]",

My initial attempt was with

from employees | eval does_not_exist = does_not_exist2 | mv_expand does_not_exist | keep does_not_exist* | stats count(*)

which returned 0 which is not correct.

changed = true;
}
Expand Down Expand Up @@ -236,13 +242,26 @@ private static LogicalPlan evalUnresolvedAtopUnary(UnaryPlan unaryAtopSource, Li
}
return new Eval(eval.source(), eval.child(), combine(pre, eval.fields(), post));
} else {
return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), nullAliases));
List<Alias> filteredNullAliases = removeShadowing(nullAliases, unaryAtopSource.child().output());
return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), filteredNullAliases));
}
}

private static List<Alias> removeShadowing(List<Alias> aliases, List<Attribute> exclude) {
Set<String> excludeNames = new HashSet<>(Expressions.names(exclude));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering, a wild thought, if the exclude list should skip synthetic attributes...

aliases.removeIf(a -> excludeNames.contains(a.name()));
return aliases;
}

private static void assertSourceType(LogicalPlan source) {
switch (source) {
case EsRelation unused -> {
case EsRelation esRelation -> {
if (esRelation.indexMode() != IndexMode.STANDARD) {
throw new EsqlIllegalArgumentException(
"invalid source type [{}] for unmapped field resolution",
esRelation.indexMode()
);
}
}
case Row unused -> {
}
Expand All @@ -252,7 +271,7 @@ private static void assertSourceType(LogicalPlan source) {
}
}

private static List<Alias> nullAliases(Set<UnresolvedAttribute> unresolved) {
private static List<Alias> nullAliases(LinkedHashSet<UnresolvedAttribute> unresolved) {
List<Alias> aliases = new ArrayList<>(unresolved.size());
unresolved.forEach(u -> aliases.add(nullAlias(u)));
return aliases;
Expand All @@ -274,12 +293,31 @@ private static LinkedHashSet<UnresolvedAttribute> unresolvedLinkedSet(List<Unres
* {@link UnresolvedTimestamp} subtypes.
*/
private static List<UnresolvedAttribute> collectUnresolved(LogicalPlan plan) {
var aliasedGroupings = aliasNamesInAggregateGroupings(plan);
List<UnresolvedAttribute> unresolved = new ArrayList<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you could also build a LinkedHashSet directly and not call unresolvedLinkedSet (which is used only once).

plan.forEachExpression(UnresolvedAttribute.class, ua -> {
if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false) {
if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false
// The aggs will "export" the aliases as UnresolvedAttributes part of their .aggregates(); we don't need to consider those
// as they'll be resolved as refs once the aliased expression is resolved.
&& aliasedGroupings.contains(ua.name()) == false) {
unresolved.add(ua);
}
});
return unresolved;
}

/**
* @return the names of the aliases used in the grouping expressions of any Aggregate found in the plan.
*/
private static Set<String> aliasNamesInAggregateGroupings(LogicalPlan plan) {
Set<String> aliasNames = new LinkedHashSet<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need a LinkedHashSet here, unless I'm missing something. A simple HashSet should be enough.

plan.forEachUp(Aggregate.class, agg -> {
for (var grouping : agg.groupings()) {
if (grouping instanceof Alias alias) {
aliasNames.add(alias.name());
}
}
});
return aliasNames;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
import org.elasticsearch.xpack.esql.EsqlTestUtils;
import org.elasticsearch.xpack.esql.VerificationException;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.core.type.InvalidMappedField;
Expand All @@ -23,6 +25,7 @@
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.inference.InferenceResolution;
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
import org.elasticsearch.xpack.esql.optimizer.rules.PlanConsistencyChecker;
import org.elasticsearch.xpack.esql.parser.EsqlParser;
import org.elasticsearch.xpack.esql.parser.QueryParams;
import org.elasticsearch.xpack.esql.plan.EsqlStatement;
Expand Down Expand Up @@ -172,12 +175,24 @@ public static LogicalPlan analyze(String query) {
}

public static LogicalPlan analyzeStatement(String query) {
return analyzeStatement(query, true);
}

public static LogicalPlan analyzeStatement(String query, boolean checkPlan) {
var statement = EsqlParser.INSTANCE.createStatement(query);
var relations = statement.plan().collectFirstChildren(UnresolvedRelation.class::isInstance);
var indexName = relations.isEmpty() ? null : ((UnresolvedRelation) relations.getFirst()).indexPattern().indexPattern();
var indexResolutions = indexResolutions(indexName);
var analyzer = analyzer(indexResolutions, TEST_VERIFIER, configuration(query), statement);
return analyzer.analyze(statement.plan());
var analyzed = analyzer.analyze(statement.plan());
if (checkPlan) {
var failures = new Failures();
PlanConsistencyChecker.checkPlan(analyzed, failures);
if (failures.hasFailures()) {
throw new VerificationException(failures);
}
}
return analyzed;
}

public static LogicalPlan analyze(String query, String mapping) {
Expand Down
Loading