Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/138270.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 138270
summary: Drop `PropagateInlineEvals` optimizer rule
area: ES|QL
type: enhancement
issues:
- 124754
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are cosmetic. Stumbled upon them during development, as tests were failing and realised they're hard to read.

Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ required_capability: inline_stats
FROM employees
| KEEP emp_no, languages, gender, last_name
| WHERE gender IS NOT NULL
| INLINE STATS max_lang = MAX(languages), min_lang = MIN(languages) BY f = left(last_name, 1), gender
| INLINE STATS max_lang = MAX(languages), min_lang = MIN(languages) BY f = LEFT(last_name, 1), gender
| SORT last_name DESC
| LIMIT 8
;
Expand Down Expand Up @@ -2673,14 +2673,14 @@ FROM employees
stdDevFilter
required_capability: inline_stats
FROM employees
| inline stats greater_than = STD_DEV(salary_change) WHERE languages > 3
, less_than = STD_DEV(salary_change) WHERE languages <= 3
, salary = STD_DEV(salary * 2)
, count = COUNT(*) BY gender
| INLINE STATS greater_than = STD_DEV(salary_change) WHERE languages > 3
, less_than = STD_DEV(salary_change) WHERE languages <= 3
, salary = STD_DEV(salary * 2)
, count = COUNT(*) BY gender
| EVAL greater_than = ROUND(greater_than, 5)
, less_than = ROUND(less_than, 5)
, salary = ROUND(salary, 5)
| keep emp_no, gender, languages, *than, salary, count
, less_than = ROUND(less_than, 5)
, salary = ROUND(salary, 5)
| KEEP emp_no, gender, languages, *than, salary, count
| SORT emp_no asc
| limit 10
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEmptyRelation;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEquals;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateEvalFoldables;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateInlineEvals;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropagateNullable;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PropgateUnmappedFields;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.PruneColumns;
Expand Down Expand Up @@ -159,9 +158,6 @@ protected static Batch<LogicalPlan> substitutions() {
// after translating metric aggregates, we need to replace surrogate substitutions and nested expressions again.
new SubstituteSurrogateAggregations(),
new ReplaceAggregateNestedExpressionWithEval(),
// this one needs to be placed before ReplaceAliasingEvalWithProject, so that any potential aliasing eval (eval x = y)
// is not replaced with a Project before the eval to be copied on the left hand side of an InlineJoin
new PropagateInlineEvals(),
new ReplaceRegexMatch(),
new ReplaceTrivialTypeConversions(),
new ReplaceAliasingEvalWithProject(),
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

package org.elasticsearch.xpack.esql.optimizer.rules.logical;

import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
Expand All @@ -18,6 +20,10 @@
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
import org.elasticsearch.xpack.esql.plan.logical.join.StubRelation;
import org.elasticsearch.xpack.esql.rule.Rule;

import java.util.ArrayList;
import java.util.HashMap;
Expand All @@ -34,14 +40,57 @@
* becomes
* {@code EVAL `a + 1` = a + 1, `x % 2` = x % 2 | INLINE STATS SUM(`a+1`_ref) BY `x % 2`_ref}
*/
public final class ReplaceAggregateNestedExpressionWithEval extends OptimizerRules.OptimizerRule<Aggregate> {
public final class ReplaceAggregateNestedExpressionWithEval extends Rule<LogicalPlan, LogicalPlan> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd add an example to the javadoc, so it's easier to expect what this does with inline stats.


@Override
protected LogicalPlan rule(Aggregate aggregate) {
List<Alias> evals = new ArrayList<>();
public LogicalPlan apply(LogicalPlan plan) {
return plan.transformDown(p -> switch (p) {
case InlineJoin inlineJoin -> rule(inlineJoin);
// aggs having a StubRelation child are handled by the InlineJoin case above, only deal with the "stand-alone" Aggregate here.
case Aggregate agg -> isInlineStats(agg) ? agg : rule(agg, null);
default -> p;
});
}

/**
* Returns {@code true} if the Aggregate has a {@code StubRelation} as (grand)child, meaning it is under a {@code InlineJoin}, i.e.,
* part of an {@code INLINE STATS}.
*/
private static boolean isInlineStats(Aggregate aggregate) {
var child = aggregate.child();
while (child instanceof UnaryPlan unary) {
child = unary.child();
}
return child instanceof StubRelation;
}

/**
* The InlineJoin will perform the join on the groupings, so any expressions used within the group part of the Aggregate should be
* executed on the left side of the join: they'll be part of LHS's output, and through the StubRelation, RHS's too.
* The expressions used within the aggregates part of the Aggregate will remain on the right: they'll only be used for computing the
* joined values (corresponding to the groups values).
*/
private static LogicalPlan rule(InlineJoin inlineJoin) {
Holder<Eval> evalHolder = new Holder<>(null);
LogicalPlan newRight = inlineJoin.right().transformDown(Aggregate.class, agg -> rule(agg, evalHolder));
Eval eval = evalHolder.get();
if (eval != null) {
// update the StubRelation to include the refs that'll come from the LHS Eval (added next)
newRight = newRight.transformDown(StubRelation.class, sr -> sr.extendWith(eval));
inlineJoin = new InlineJoin(inlineJoin.source(), eval.replaceChild(inlineJoin.left()), newRight, inlineJoin.config());
} else {
inlineJoin = (InlineJoin) inlineJoin.replaceRight(newRight);
}
return inlineJoin;
}

private static LogicalPlan rule(Aggregate aggregate, @Nullable Holder<Eval> evalForIJHolder) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe evalForIJHolder -> evalForInlineJoin? I was puzzled what IJ was supposed to be for a bit.

Map<String, Attribute> evalNames = new HashMap<>();
Map<GroupingFunction, Attribute> groupingAttributes = new HashMap<>();
List<Expression> newGroupings = new ArrayList<>(aggregate.groupings());
// Evaluations needed for expressions within the groupings
// "| STATS c = COUNT(*) BY a + 1" --> "| EVAL `a + 1` = a + 1 | STATS s = COUNT(*) BY `a + 1`_ref"
List<Alias> groupsEvals = new ArrayList<>(newGroupings.size());
boolean groupingChanged = false;

// start with the groupings since the aggs might reuse/reference them
Expand All @@ -52,7 +101,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
// for non-evaluable grouping functions, replace their nested expressions with attributes and extract the expression out
// into an eval (added later below)
if (asChild instanceof GroupingFunction.NonEvaluatableGroupingFunction gf) {
Expression newGroupingFunction = transformNonEvaluatableGroupingFunction(gf, evals);
Expression newGroupingFunction = transformNonEvaluatableGroupingFunction(gf, groupsEvals);
if (newGroupingFunction != gf) {
groupingChanged = true;
newGroupings.set(i, as.replaceChild(newGroupingFunction));
Expand All @@ -61,7 +110,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
// Move the alias into an eval and replace it with its attribute.
groupingChanged = true;
var attr = as.toAttribute();
evals.add(as);
groupsEvals.add(as);
evalNames.put(as.name(), attr);
newGroupings.set(i, attr);
if (asChild instanceof GroupingFunction.EvaluatableGroupingFunction gf) {
Expand All @@ -74,10 +123,14 @@ protected LogicalPlan rule(Aggregate aggregate) {
Holder<Boolean> aggsChanged = new Holder<>(false);
List<? extends NamedExpression> aggs = aggregate.aggregates();
List<NamedExpression> newAggs = new ArrayList<>(aggs.size());
// Evaluations needed for expressions within the aggs
// "| STATS s = SUM(a + 1)" --> "| EVAL `a + 1` = a + 1 | STATS s = SUM(`a + 1`_ref)"
// (i.e. not outside, like `| STATS s = SUM(a) + 1`; those are handled by ReplaceAggregateAggExpressionWithEval)
List<Alias> aggsEvals = new ArrayList<>(aggs.size());

// map to track common expressions
Map<Expression, Attribute> expToAttribute = new HashMap<>();
for (Alias a : evals) {
for (Alias a : groupsEvals) {
expToAttribute.put(a.child().canonical(), a.toAttribute());
}

Expand All @@ -102,7 +155,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
// look for the aggregate function
var replaced = child.transformUp(
AggregateFunction.class,
af -> transformAggregateFunction(af, expToAttribute, evals, counter, aggsChanged)
af -> transformAggregateFunction(af, expToAttribute, aggsEvals, counter, aggsChanged)
);
// replace any evaluatable grouping functions with their references pointing to the added synthetic eval
replaced = replaced.transformDown(GroupingFunction.EvaluatableGroupingFunction.class, gf -> {
Expand All @@ -118,17 +171,56 @@ protected LogicalPlan rule(Aggregate aggregate) {
newAggs.add(a);
}

if (evals.size() > 0) {
var groupings = groupingChanged ? newGroupings : aggregate.groupings();
var aggregates = aggsChanged.get() ? newAggs : aggregate.aggregates();

var newEval = new Eval(aggregate.source(), aggregate.child(), evals);
aggregate = aggregate.with(newEval, groupings, aggregates);
if (groupingChanged || aggsChanged.get()) {
var evals = evals(aggregate, groupsEvals, aggsEvals, evalForIJHolder != null);
if (evalForIJHolder != null) {
evalForIJHolder.set(evals.v1());
}
aggregate = updateAggregate(aggregate, evals.v2(), groupingChanged ? newGroupings : null, aggsChanged.get() ? newAggs : null);
}

return aggregate;
}

/**
* The evals that will go under the Aggregate: either all the evals collected, for "stand-alone" Aggregate,
* or only those needed for the aggregates (nested) expressions, for the Aggregate under InlineJoin.
* @return a Tuple of {@code Eval}s (LHS, RHS), either of which can be null if no evals are needed. In case the Aggregate is
* stand-alone, the RHS Eval will contain all evals, and the LHS will be null.
*/
private static Tuple<Eval, Eval> evals(Aggregate aggregate, List<Alias> groupsEvals, List<Alias> aggsEvals, boolean isInlineStats) {
Eval lhs = null, rhs;
List<Alias> subAggEvals;

if (isInlineStats) { // this is an INLINE STATS scenario, group evals go to the LHS, aggs evals remain on the RHS
if (groupsEvals.size() > 0) {
lhs = new Eval(aggregate.source(), aggregate.child(), groupsEvals); // LHS evals
}
subAggEvals = aggsEvals; // RHS evals
} else { // this is a regular STATS scenario, place all evals under the Aggregate
subAggEvals = groupsEvals;
subAggEvals.addAll(aggsEvals);
}

// add an Eval (if needed), going under the Aggregate
rhs = subAggEvals.size() > 0 ? new Eval(aggregate.source(), aggregate.child(), subAggEvals) : null;

return Tuple.tuple(lhs, rhs);
}

private static Aggregate updateAggregate(
Aggregate aggregate,
@Nullable LogicalPlan newChild,
@Nullable List<Expression> newGroupings,
@Nullable List<NamedExpression> newAggs
) {
var groupings = newGroupings != null ? newGroupings : aggregate.groupings();
var aggregates = newAggs != null ? newAggs : aggregate.aggregates();
var child = newChild != null ? newChild : aggregate.child();

return aggregate.with(child, groupings, aggregates);
}

private static Expression transformNonEvaluatableGroupingFunction(
GroupingFunction.NonEvaluatableGroupingFunction gf,
List<Alias> evals
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,7 @@ private Expression replaceFieldsForFieldTransformations(Expression e, BlockLoade
new NameId(),
true
);
Attribute.IdIgnoringWrapper key = newFunctionAttr.ignoreId();
if (addedAttrs.containsKey(key)) {
return addedAttrs.get(key);
}

addedAttrs.put(key, newFunctionAttr);
return newFunctionAttr;
return addedAttrs.computeIfAbsent(newFunctionAttr.ignoreId(), k -> newFunctionAttr);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ public class InlineJoin extends Join implements SortPreserving {
);

/**
* Replaces the source of the target plan with a stub preserving the output of the source plan.
* Replaces the source of the {@code destination} plan with a stub, preserving the output from the {@code target} plan, which
* the stub substitutes (or theoretically points to).
*/
Comment on lines +59 to 61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The added comments help, but I'm still struggling without an example. Can we add examples to the javadoc?

public static LogicalPlan stubSource(UnaryPlan sourcePlan, LogicalPlan target) {
return sourcePlan.replaceChild(new StubRelation(sourcePlan.source(), StubRelation.computeOutput(sourcePlan, target)));
public static LogicalPlan stubSource(UnaryPlan destination, LogicalPlan target) {
return destination.replaceChild(StubRelation.of(destination, target));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it's a bit confusing that we renamed target to sourcePlan in SubRelation.java but not here.

}

/**
Expand Down
Loading