-
Notifications
You must be signed in to change notification settings - Fork 25.8k
ESQL: Skip nullifying aliases for Aggregate groups #141340
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c435d5d
fd346a1
6453bfc
c213602
0aae586
6b19262
28c3bee
1288039
6794694
a720868
179b807
1028271
f0d20c8
feee104
65198a2
53fe729
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| area: ES|QL | ||
| issues: [] | ||
| pr: 141340 | ||
| summary: Skip nullifying aliases for Aggregate groups. | ||
| type: bug |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,7 +24,7 @@ | |
| import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern; | ||
| import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; | ||
| import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; | ||
| import org.elasticsearch.xpack.esql.core.util.Holder; | ||
| import org.elasticsearch.xpack.esql.plan.logical.Aggregate; | ||
| import org.elasticsearch.xpack.esql.plan.logical.EsRelation; | ||
| import org.elasticsearch.xpack.esql.plan.logical.Eval; | ||
| import org.elasticsearch.xpack.esql.plan.logical.Fork; | ||
|
|
@@ -33,9 +33,11 @@ | |
| import org.elasticsearch.xpack.esql.plan.logical.Project; | ||
| import org.elasticsearch.xpack.esql.plan.logical.Row; | ||
| import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; | ||
| import org.elasticsearch.xpack.esql.plan.logical.join.Join; | ||
| import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.HashSet; | ||
| import java.util.LinkedHashMap; | ||
| import java.util.LinkedHashSet; | ||
| import java.util.List; | ||
|
|
@@ -86,24 +88,25 @@ private static LogicalPlan resolve(LogicalPlan plan, boolean load) { | |
|
|
||
| var transformed = load ? load(plan, unresolvedLinkedSet) : nullify(plan, unresolvedLinkedSet); | ||
|
|
||
| return transformed.equals(plan) ? plan : refreshPlan(transformed, unresolved); | ||
| return transformed == plan ? plan : refreshPlan(transformed, unresolved); | ||
| } | ||
|
|
||
| /** | ||
| * The method introduces {@code EVAL missing_field = NULL}-equivalent into the plan, on top of the source, for every attribute in | ||
| * {@code unresolved}. It also "patches" the introduced attributes through the plan, where needed (like through Fork/UntionAll). | ||
| * {@code unresolved}. | ||
| */ | ||
| private static LogicalPlan nullify(LogicalPlan plan, Set<UnresolvedAttribute> unresolved) { | ||
| // insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it | ||
| var transformed = plan.transformUp( | ||
| n -> n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan, | ||
| p -> evalUnresolvedAtopUnary((UnaryPlan) p, nullAliases(unresolved)) | ||
| ); | ||
| // insert an Eval on top of those LeafPlan that are children of n-ary plans (could happen with UnionAll) | ||
| return transformed.transformUp( | ||
| n -> n instanceof UnaryPlan == false && n instanceof LeafPlan == false, | ||
| nAry -> evalUnresolvedAtopNary(nAry, nullAliases(unresolved)) | ||
| ); | ||
| private static LogicalPlan nullify(LogicalPlan plan, LinkedHashSet<UnresolvedAttribute> unresolved) { | ||
| return plan.transformUp(n -> { | ||
| // insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it | ||
| if (n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan) { | ||
| return evalUnresolvedAtopUnary(unary, nullAliases(unresolved)); | ||
| } | ||
| // insert an Eval on top of those LeafPlan that are children of n-ary plans (LookupJoin, UnionAll) | ||
| if ((n instanceof UnaryPlan || n instanceof LeafPlan) == false) { | ||
| return evalUnresolvedBelowNary(n, unresolved); | ||
| } | ||
| return n; | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -147,15 +150,13 @@ private static Fork patchFork(Fork fork) { | |
| List<LogicalPlan> newChildren = new ArrayList<>(fork.children().size()); | ||
| boolean childrenChanged = false; | ||
| for (var child : fork.children()) { | ||
| Holder<Boolean> patched = new Holder<>(false); | ||
| var transformed = child.transformDown( | ||
| // TODO add a suitable forEachDownMayReturnEarly equivalent | ||
| n -> patched.get() == false && n instanceof Project, // process top Project only (Fork-injected) | ||
| n -> { | ||
| patched.set(true); | ||
| return patchForkProject((Project) n); | ||
| var transformed = child.transformDownSkipBranch((n, skip) -> { | ||
| if (n instanceof Project project) { | ||
| n = patchForkProject(project); | ||
| skip.set(true); // process top Project only (Fork-injected) | ||
| } | ||
| ); | ||
| return n; | ||
| }); | ||
| childrenChanged |= transformed != child; | ||
| newChildren.add(transformed); | ||
| } | ||
|
|
@@ -167,12 +168,14 @@ private static Fork patchFork(Fork fork) { | |
| * by the evalUnresolvedAtopXXX methods and need to be "let through" the Project. | ||
| */ | ||
| private static Project patchForkProject(Project project) { | ||
| var projectOutput = project.output(); | ||
| var childOutput = project.child().output(); | ||
| List<Attribute> projectOutput = project.output(); | ||
| List<Attribute> childOutput = project.child().output(); | ||
| if (projectOutput.equals(childOutput) == false) { | ||
| List<Attribute> delta = new ArrayList<>(childOutput); | ||
| delta.removeAll(projectOutput); | ||
| project = project.withProjections(mergeOutputAttributes(delta, projectOutput)); | ||
| if (delta.isEmpty() == false) { | ||
| project = project.withProjections(mergeOutputAttributes(delta, projectOutput)); | ||
| } | ||
| } | ||
| return project; | ||
| } | ||
|
|
@@ -201,12 +204,15 @@ private static LogicalPlan refreshUnresolved(LogicalPlan plan, List<UnresolvedAt | |
| /** | ||
| * Inserts an Eval atop each child of the given {@code nAry}, if the child is a LeafPlan. | ||
| */ | ||
| private static LogicalPlan evalUnresolvedAtopNary(LogicalPlan nAry, List<Alias> nullAliases) { | ||
| private static LogicalPlan evalUnresolvedBelowNary(LogicalPlan nAry, LinkedHashSet<UnresolvedAttribute> unresolved) { | ||
| List<LogicalPlan> newChildren = new ArrayList<>(nAry.children().size()); | ||
| boolean changed = false; | ||
| for (var child : nAry.children()) { | ||
| if (child instanceof LeafPlan source) { | ||
| if (child instanceof LeafPlan source | ||
| // skip right-sides of the Joins | ||
| && (nAry instanceof Join == false || child == ((Join) nAry).left())) { | ||
| assertSourceType(source); | ||
| var nullAliases = removeShadowing(nullAliases(unresolved), source.output()); | ||
| child = new Eval(source.source(), source, nullAliases); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trying to break the logic here because there doesn't seem to be a way to protect the which results in My initial attempt was with which returned |
||
| changed = true; | ||
| } | ||
|
|
@@ -236,13 +242,26 @@ private static LogicalPlan evalUnresolvedAtopUnary(UnaryPlan unaryAtopSource, Li | |
| } | ||
| return new Eval(eval.source(), eval.child(), combine(pre, eval.fields(), post)); | ||
| } else { | ||
| return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), nullAliases)); | ||
| List<Alias> filteredNullAliases = removeShadowing(nullAliases, unaryAtopSource.child().output()); | ||
| return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), filteredNullAliases)); | ||
| } | ||
| } | ||
|
|
||
| private static List<Alias> removeShadowing(List<Alias> aliases, List<Attribute> exclude) { | ||
| Set<String> excludeNames = new HashSet<>(Expressions.names(exclude)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am wondering, a wild thought, if the |
||
| aliases.removeIf(a -> excludeNames.contains(a.name())); | ||
| return aliases; | ||
| } | ||
|
|
||
| private static void assertSourceType(LogicalPlan source) { | ||
| switch (source) { | ||
| case EsRelation unused -> { | ||
| case EsRelation esRelation -> { | ||
| if (esRelation.indexMode() != IndexMode.STANDARD) { | ||
| throw new EsqlIllegalArgumentException( | ||
| "invalid source type [{}] for unmapped field resolution", | ||
| esRelation.indexMode() | ||
| ); | ||
| } | ||
| } | ||
| case Row unused -> { | ||
| } | ||
|
|
@@ -252,7 +271,7 @@ private static void assertSourceType(LogicalPlan source) { | |
| } | ||
| } | ||
|
|
||
| private static List<Alias> nullAliases(Set<UnresolvedAttribute> unresolved) { | ||
| private static List<Alias> nullAliases(LinkedHashSet<UnresolvedAttribute> unresolved) { | ||
| List<Alias> aliases = new ArrayList<>(unresolved.size()); | ||
| unresolved.forEach(u -> aliases.add(nullAlias(u))); | ||
| return aliases; | ||
|
|
@@ -274,12 +293,31 @@ private static LinkedHashSet<UnresolvedAttribute> unresolvedLinkedSet(List<Unres | |
| * {@link UnresolvedTimestamp} subtypes. | ||
| */ | ||
| private static List<UnresolvedAttribute> collectUnresolved(LogicalPlan plan) { | ||
| var aliasedGroupings = aliasNamesInAggregateGroupings(plan); | ||
| List<UnresolvedAttribute> unresolved = new ArrayList<>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you could also build a |
||
| plan.forEachExpression(UnresolvedAttribute.class, ua -> { | ||
| if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false) { | ||
| if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false | ||
| // The aggs will "export" the aliases as UnresolvedAttributes part of their .aggregates(); we don't need to consider those | ||
| // as they'll be resolved as refs once the aliased expression is resolved. | ||
| && aliasedGroupings.contains(ua.name()) == false) { | ||
| unresolved.add(ua); | ||
| } | ||
| }); | ||
| return unresolved; | ||
| } | ||
|
|
||
| /** | ||
| * @return the names of the aliases used in the grouping expressions of any Aggregate found in the plan. | ||
| */ | ||
| private static Set<String> aliasNamesInAggregateGroupings(LogicalPlan plan) { | ||
| Set<String> aliasNames = new LinkedHashSet<>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you need a |
||
| plan.forEachUp(Aggregate.class, agg -> { | ||
| for (var grouping : agg.groupings()) { | ||
| if (grouping instanceof Alias alias) { | ||
| aliasNames.add(alias.name()); | ||
| } | ||
| } | ||
| }); | ||
| return aliasNames; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated to this change, I found
refreshUnresolved(LogicalPlan plan, List<UnresolvedAttribute> unresolved)method to be unnecessary, imho. I guess it boils down to one's style/preference, for me the code is too fragmented in few places with methods that are called only once.refreshPlanis imho better describing the logic if all the code is in that method.