Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cdf6059
Add reproducing csv tests
alex-spies Feb 15, 2024
71ccfc3
Fix shadowing when pushing down eval/regex/enrich
alex-spies Feb 15, 2024
0cea3f7
Add tests
alex-spies Feb 20, 2024
7f7a91c
Re-enable+fix dependency check
alex-spies Feb 20, 2024
726fee1
Cleanup
alex-spies Feb 20, 2024
e00db6f
Update skip labels for tests
alex-spies Feb 20, 2024
abdeee3
Generalize unit test to DISSECT, GROK and ENRICH
alex-spies Feb 21, 2024
1cbc32d
WIP csv tests
alex-spies Feb 21, 2024
65d05f3
Merge remote-tracking branch 'upstream/main' into fix-pushdownregexev…
alex-spies Feb 21, 2024
33e3d1b
Add more csv tests
alex-spies Feb 21, 2024
d61e0cd
Use nameId in temp name for renamed attributes
alex-spies Feb 21, 2024
b33f667
Update LogicalPlan in test javadoc
alex-spies Feb 21, 2024
7c2fbdb
Merge remote-tracking branch 'upstream/main' into fix-pushdownregexev…
alex-spies Feb 23, 2024
1d88d42
Add test case without KEEP at the end
alex-spies Feb 23, 2024
a948b88
Fix typo
alex-spies Feb 23, 2024
a616e9d
Remove NameId.toLong
alex-spies Feb 23, 2024
5f2fe3a
Factor 3 PushDown rules into single helper
alex-spies Feb 23, 2024
2e67337
Add csv test for shadowing by chained evaluations
alex-spies Feb 23, 2024
382bb57
Preserve Source, use computeIfAbsent
alex-spies Feb 23, 2024
6db2cf2
Some more cleanup
alex-spies Feb 23, 2024
ce864c3
Disable logical DEPENDENCY_CHECK again
alex-spies Feb 23, 2024
278c0d4
Spotless
alex-spies Feb 23, 2024
04fb597
Update docs/changelog/105650.yaml
alex-spies Feb 26, 2024
5dc8163
Merge remote-tracking branch 'upstream/main' into fix-pushdownregexev…
alex-spies Feb 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/105650.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 105650
summary: "ESQL: Fix wrong attribute shadowing in pushdown rules"
area: ES|QL
type: bug
issues:
- 105434
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ Bezalel Simmel | Bezalel | Simmel
;


overwriteNameAfterSort#[skip:-8.13.0]
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should backport this bugfix to 8.13, so I'm setting the skip labels accordingly.

from employees | sort emp_no ASC | dissect first_name "Ge%{emp_no}gi" | limit 1 | rename emp_no as first_name_fragment | keep first_name_fragment
;

first_name_fragment:keyword
or
;

# for now it calculates only based on the first value
multivalueInput
from employees | where emp_no <= 10006 | dissect job_positions "%{a} %{b} %{c}" | sort emp_no | keep emp_no, a, b, c;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ emp_no:integer | x:keyword | lang:keyword
;



withAliasSort
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3
| enrich languages_policy on x with lang = language_name;
Expand All @@ -63,6 +62,17 @@ emp_no:integer | x:keyword | lang:keyword
;


withAliasOverwriteName#[skip:-8.13.0]
from employees | sort emp_no
| eval x = to_string(languages) | enrich languages_policy on x with emp_no = language_name
| keep emp_no | limit 1
;

emp_no:keyword
French
;


withAliasAndPlain
from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x
| enrich languages_policy on x with lang = language_name, language_name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -368,4 +368,57 @@ avg_height_feet:double
// end::evalUnnamedColumnStats-result[]
;

overwriteName#[skip:-8.13.0]
FROM employees
| SORT emp_no asc
| EVAL full_name = concat(first_name, " ", last_name)
| EVAL emp_no = concat(full_name, " ", to_string(emp_no))
| KEEP full_name, emp_no
| LIMIT 3;

full_name:keyword | emp_no:keyword
Georgi Facello | Georgi Facello 10001
Bezalel Simmel | Bezalel Simmel 10002
Parto Bamford | Parto Bamford 10003
;

overwriteNameWhere#[skip:-8.13.0]
FROM employees
| SORT emp_no ASC
| EVAL full_name = concat(first_name, " ", last_name)
| EVAL emp_no = concat(full_name, " ", to_string(emp_no))
| WHERE emp_no == "Bezalel Simmel 10002"
| KEEP full_name, emp_no
| LIMIT 3;

full_name:keyword | emp_no:keyword
Bezalel Simmel | Bezalel Simmel 10002
;

overwriteNameAfterSort#[skip:-8.13.0]
FROM employees
| SORT emp_no ASC
| EVAL emp_no = -emp_no
| LIMIT 3
| KEEP emp_no
;

emp_no:i
-10001
-10002
-10003
;

overwriteNameAfterSortChained#[skip:-8.13.0]
FROM employees
| SORT emp_no ASC
| EVAL x = emp_no, y = -emp_no, emp_no = y
| LIMIT 3
| KEEP emp_no
;

emp_no:i
-10001
-10002
-10003
;
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ Bezalel Simmel | Bezalel | Simmel
;


overwriteNameAfterSort#[skip:-8.13.0]
from employees | sort emp_no ASC | grok first_name "Ge(?<emp_no>[a-z]{2})gi" | limit 1 | rename emp_no as first_name_fragment | keep first_name_fragment
;

first_name_fragment:keyword
or
;


multivalueOutput
row a = "foo bar" | grok a "%{WORD:b} %{WORD:b}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -259,7 +260,11 @@ protected LogicalPlan rule(Aggregate aggregate) {
static String temporaryName(Expression inner, Expression outer, int suffix) {
String in = toString(inner);
String out = toString(outer);
return "$$" + in + "$" + out + "$" + suffix;
return rawTemporaryName(in, out, String.valueOf(suffix));
}

static String rawTemporaryName(String inner, String outer, String suffix) {
return "$$" + inner + "$" + outer + "$" + suffix;
}

static int TO_STRING_LIMIT = 16;
Expand Down Expand Up @@ -839,9 +844,31 @@ private static LogicalPlan maybePushDownPastUnary(Filter filter, UnaryPlan unary
}
}

protected static class PushDownEval extends OptimizerRules.OptimizerRule<Eval> {
@Override
protected LogicalPlan rule(Eval eval) {
return pushGeneratingPlanPastProjectAndOrderBy(eval, asAttributes(eval.fields()));
}
}

protected static class PushDownRegexExtract extends OptimizerRules.OptimizerRule<RegexExtract> {
@Override
protected LogicalPlan rule(RegexExtract re) {
return pushGeneratingPlanPastProjectAndOrderBy(re, re.extractedFields());
}
}

protected static class PushDownEnrich extends OptimizerRules.OptimizerRule<Enrich> {
@Override
protected LogicalPlan rule(Enrich en) {
return pushGeneratingPlanPastProjectAndOrderBy(en, asAttributes(en.enrichFields()));
}
}

/**
* Pushes Evals past OrderBys. Although it seems arbitrary whether the OrderBy or the Eval is executed first,
* this transformation ensures that OrderBys only separated by an eval can be combined by PushDownAndCombineOrderBy.
* Pushes LogicalPlans which generate new attributes (Eval, Grok/Dissect, Enrich), past OrderBys and Projections.
* Although it seems arbitrary whether the OrderBy or the Eval is executed first, this transformation ensures that OrderBys only
* separated by an eval can be combined by PushDownAndCombineOrderBy.
*
* E.g.:
*
Expand All @@ -851,59 +878,82 @@ private static LogicalPlan maybePushDownPastUnary(Filter filter, UnaryPlan unary
*
* ... | eval x = b + 1 | sort a | sort x
*
* Ordering the evals before the orderBys has the advantage that it's always possible to order the plans like this.
* Ordering the Evals before the OrderBys has the advantage that it's always possible to order the plans like this.
* E.g., in the example above it would not be possible to put the eval after the two orderBys.
*
* In case one of the Eval's fields would shadow the orderBy's attributes, we rename the attribute first.
*
* E.g.
*
* ... | sort a | eval a = b + 1 | ...
*
* becomes
*
* ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a
*/
protected static class PushDownEval extends OptimizerRules.OptimizerRule<Eval> {
@Override
protected LogicalPlan rule(Eval eval) {
LogicalPlan child = eval.child();
private static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan generatingPlan, List<Attribute> generatedAttributes) {
LogicalPlan child = generatingPlan.child();

if (child instanceof OrderBy orderBy) {
return orderBy.replaceChild(eval.replaceChild(orderBy.child()));
} else if (child instanceof Project) {
var projectWithEvalChild = pushDownPastProject(eval);
var fieldProjections = asAttributes(eval.fields());
return projectWithEvalChild.withProjections(mergeOutputExpressions(fieldProjections, projectWithEvalChild.projections()));
}
if (child instanceof OrderBy orderBy) {
Set<String> evalFieldNames = new LinkedHashSet<>(Expressions.names(generatedAttributes));

return eval;
}
}
// Look for attributes in the OrderBy's expressions and create aliases with temporary names for them.
AttributeReplacement nonShadowedOrders = renameAttributesInExpressions(evalFieldNames, orderBy.order());

// same as for PushDownEval
protected static class PushDownRegexExtract extends OptimizerRules.OptimizerRule<RegexExtract> {
@Override
protected LogicalPlan rule(RegexExtract re) {
LogicalPlan child = re.child();
AttributeMap<Alias> aliasesForShadowedOrderByAttrs = nonShadowedOrders.replacedAttributes;
@SuppressWarnings("unchecked")
List<Order> newOrder = (List<Order>) (List<?>) nonShadowedOrders.rewrittenExpressions;

if (child instanceof OrderBy orderBy) {
return orderBy.replaceChild(re.replaceChild(orderBy.child()));
} else if (child instanceof Project) {
var projectWithChild = pushDownPastProject(re);
return projectWithChild.withProjections(mergeOutputExpressions(re.extractedFields(), projectWithChild.projections()));
if (aliasesForShadowedOrderByAttrs.isEmpty() == false) {
List<Alias> newAliases = new ArrayList<>(aliasesForShadowedOrderByAttrs.values());

LogicalPlan plan = new Eval(orderBy.source(), orderBy.child(), newAliases);
plan = generatingPlan.replaceChild(plan);
plan = new OrderBy(orderBy.source(), plan, newOrder);
plan = new Project(generatingPlan.source(), plan, generatingPlan.output());

return plan;
}

return re;
return orderBy.replaceChild(generatingPlan.replaceChild(orderBy.child()));
} else if (child instanceof Project) {
var projectWithEvalChild = pushDownPastProject(generatingPlan);
return projectWithEvalChild.withProjections(mergeOutputExpressions(generatedAttributes, projectWithEvalChild.projections()));
}

return generatingPlan;
}

// TODO double-check: this should be the same as EVAL and GROK/DISSECT, needed to avoid unbounded sort
protected static class PushDownEnrich extends OptimizerRules.OptimizerRule<Enrich> {
@Override
protected LogicalPlan rule(Enrich re) {
LogicalPlan child = re.child();
private record AttributeReplacement(List<Expression> rewrittenExpressions, AttributeMap<Alias> replacedAttributes) {};

if (child instanceof OrderBy orderBy) {
return orderBy.replaceChild(re.replaceChild(orderBy.child()));
} else if (child instanceof Project) {
var projectWithChild = pushDownPastProject(re);
var attrs = asAttributes(re.enrichFields());
return projectWithChild.withProjections(mergeOutputExpressions(attrs, projectWithChild.projections()));
}
/**
* Replace attributes in the given expressions by assigning them temporary names.
* Returns the rewritten expressions and a map with an alias for each replaced attribute; the rewritten expressions reference
* these aliases.
*/
private static AttributeReplacement renameAttributesInExpressions(
Set<String> attributeNamesToRename,
List<? extends Expression> expressions
) {
AttributeMap<Alias> aliasesForReplacedAttributes = new AttributeMap<>();
List<Expression> rewrittenExpressions = new ArrayList<>();

for (Expression expr : expressions) {
rewrittenExpressions.add(expr.transformUp(Attribute.class, attr -> {
if (attributeNamesToRename.contains(attr.name())) {
Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> {
String tempName = SubstituteSurrogates.rawTemporaryName(a.name(), "temp_name", a.id().toString());
// TODO: this should be synthetic
return new Alias(a.source(), tempName, null, a, null, false);
});
return renamedAttribute.toAttribute();
}

return re;
return attr;
}));
}

return new AttributeReplacement(rewrittenExpressions, aliasesForReplacedAttributes);
}

protected static class PushDownAndCombineOrderBy extends OptimizerRules.OptimizerRule<OrderBy> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,14 @@
import org.elasticsearch.xpack.esql.plan.physical.RegexExtractExec;
import org.elasticsearch.xpack.esql.plan.physical.RowExec;
import org.elasticsearch.xpack.esql.plan.physical.ShowExec;
import org.elasticsearch.xpack.ql.common.Failure;
import org.elasticsearch.xpack.ql.common.Failures;
import org.elasticsearch.xpack.ql.expression.AttributeSet;
import org.elasticsearch.xpack.ql.expression.Expressions;
import org.elasticsearch.xpack.ql.plan.QueryPlan;
import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
import org.elasticsearch.xpack.ql.plan.logical.EsRelation;
import org.elasticsearch.xpack.ql.plan.logical.LogicalPlan;

import java.util.Collection;

import static org.elasticsearch.xpack.ql.common.Failure.fail;

class OptimizerRules {
Expand All @@ -46,7 +44,7 @@ private OptimizerRules() {}

static class DependencyConsistency<P extends QueryPlan<P>> {

void checkPlan(P p, Collection<Failure> failures) {
void checkPlan(P p, Failures failures) {
AttributeSet refs = references(p);
AttributeSet input = p.inputSet();
AttributeSet generated = generates(p);
Expand Down
Loading