diff --git a/docs/changelog/105650.yaml b/docs/changelog/105650.yaml new file mode 100644 index 0000000000000..f43da5b315f4c --- /dev/null +++ b/docs/changelog/105650.yaml @@ -0,0 +1,6 @@ +pr: 105650 +summary: "ESQL: Fix wrong attribute shadowing in pushdown rules" +area: ES|QL +type: bug +issues: + - 105434 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 1133b24cd1cf3..225ea37688689 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -146,6 +146,14 @@ Bezalel Simmel | Bezalel | Simmel ; +overwriteNameAfterSort#[skip:-8.13.0] +from employees | sort emp_no ASC | dissect first_name "Ge%{emp_no}gi" | limit 1 | rename emp_no as first_name_fragment | keep first_name_fragment +; + +first_name_fragment:keyword +or +; + # for now it calculates only based on the first value multivalueInput from employees | where emp_no <= 10006 | dissect job_positions "%{a} %{b} %{c}" | sort emp_no | keep emp_no, a, b, c; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-IT_tests_only.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-IT_tests_only.csv-spec index e107fc2ffea63..2fa567996290d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-IT_tests_only.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich-IT_tests_only.csv-spec @@ -51,7 +51,6 @@ emp_no:integer | x:keyword | lang:keyword ; - withAliasSort from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3 | enrich languages_policy on x with lang = language_name; @@ -63,6 +62,17 @@ emp_no:integer | x:keyword | lang:keyword ; +withAliasOverwriteName#[skip:-8.13.0] +from employees | sort emp_no +| eval x = to_string(languages) | enrich languages_policy on x with emp_no = language_name +| keep emp_no | limit 1 +; + +emp_no:keyword +French +; + + withAliasAndPlain from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x | enrich languages_policy on x with lang = language_name, language_name; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 21ce5cf5c7fc2..a8e5a5930a06b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -368,4 +368,57 @@ avg_height_feet:double // end::evalUnnamedColumnStats-result[] ; +overwriteName#[skip:-8.13.0] +FROM employees +| SORT emp_no asc +| EVAL full_name = concat(first_name, " ", last_name) +| EVAL emp_no = concat(full_name, " ", to_string(emp_no)) +| KEEP full_name, emp_no +| LIMIT 3; + +full_name:keyword | emp_no:keyword +Georgi Facello | Georgi Facello 10001 +Bezalel Simmel | Bezalel Simmel 10002 +Parto Bamford | Parto Bamford 10003 +; + +overwriteNameWhere#[skip:-8.13.0] +FROM employees +| SORT emp_no ASC +| EVAL full_name = concat(first_name, " ", last_name) +| EVAL emp_no = concat(full_name, " ", to_string(emp_no)) +| WHERE emp_no == "Bezalel Simmel 10002" +| KEEP full_name, emp_no +| LIMIT 3; + +full_name:keyword | emp_no:keyword +Bezalel Simmel | Bezalel Simmel 10002 +; + +overwriteNameAfterSort#[skip:-8.13.0] +FROM employees +| SORT emp_no ASC +| EVAL emp_no = -emp_no +| LIMIT 3 +| KEEP emp_no +; + +emp_no:i +-10001 +-10002 +-10003 +; +overwriteNameAfterSortChained#[skip:-8.13.0] +FROM employees +| SORT emp_no ASC +| EVAL x = emp_no, y = -emp_no, emp_no = y +| LIMIT 3 +| KEEP emp_no +; + +emp_no:i +-10001 +-10002 +-10003 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index f71f51d42c45f..fbe31deeb0f97 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -122,6 +122,15 @@ Bezalel Simmel | Bezalel | Simmel ; +overwriteNameAfterSort#[skip:-8.13.0] +from employees | sort emp_no ASC | grok first_name "Ge(?[a-z]{2})gi" | limit 1 | rename emp_no as first_name_fragment | keep first_name_fragment +; + +first_name_fragment:keyword +or +; + + multivalueOutput row a = "foo bar" | grok a "%{WORD:b} %{WORD:b}"; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index ab413bd89f0a6..db5751245c40a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -72,6 +72,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -259,7 +260,11 @@ protected LogicalPlan rule(Aggregate aggregate) { static String temporaryName(Expression inner, Expression outer, int suffix) { String in = toString(inner); String out = toString(outer); - return "$$" + in + "$" + out + "$" + suffix; + return rawTemporaryName(in, out, String.valueOf(suffix)); + } + + static String rawTemporaryName(String inner, String outer, String suffix) { + return "$$" + inner + "$" + outer + "$" + suffix; } static int TO_STRING_LIMIT = 16; @@ -839,9 +844,31 @@ private static LogicalPlan maybePushDownPastUnary(Filter filter, UnaryPlan unary } } + protected static class PushDownEval extends OptimizerRules.OptimizerRule { + @Override + protected LogicalPlan rule(Eval eval) { + return pushGeneratingPlanPastProjectAndOrderBy(eval, asAttributes(eval.fields())); + } + } + + protected static class PushDownRegexExtract extends OptimizerRules.OptimizerRule { + @Override + protected LogicalPlan rule(RegexExtract re) { + return pushGeneratingPlanPastProjectAndOrderBy(re, re.extractedFields()); + } + } + + protected static class PushDownEnrich extends OptimizerRules.OptimizerRule { + @Override + protected LogicalPlan rule(Enrich en) { + return pushGeneratingPlanPastProjectAndOrderBy(en, asAttributes(en.enrichFields())); + } + } + /** - * Pushes Evals past OrderBys. Although it seems arbitrary whether the OrderBy or the Eval is executed first, - * this transformation ensures that OrderBys only separated by an eval can be combined by PushDownAndCombineOrderBy. + * Pushes LogicalPlans which generate new attributes (Eval, Grok/Dissect, Enrich), past OrderBys and Projections. + * Although it seems arbitrary whether the OrderBy or the Eval is executed first, this transformation ensures that OrderBys only + * separated by an eval can be combined by PushDownAndCombineOrderBy. * * E.g.: * @@ -851,59 +878,82 @@ private static LogicalPlan maybePushDownPastUnary(Filter filter, UnaryPlan unary * * ... | eval x = b + 1 | sort a | sort x * - * Ordering the evals before the orderBys has the advantage that it's always possible to order the plans like this. + * Ordering the Evals before the OrderBys has the advantage that it's always possible to order the plans like this. * E.g., in the example above it would not be possible to put the eval after the two orderBys. + * + * In case one of the Eval's fields would shadow the orderBy's attributes, we rename the attribute first. + * + * E.g. + * + * ... | sort a | eval a = b + 1 | ... + * + * becomes + * + * ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a */ - protected static class PushDownEval extends OptimizerRules.OptimizerRule { - @Override - protected LogicalPlan rule(Eval eval) { - LogicalPlan child = eval.child(); + private static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan generatingPlan, List generatedAttributes) { + LogicalPlan child = generatingPlan.child(); - if (child instanceof OrderBy orderBy) { - return orderBy.replaceChild(eval.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithEvalChild = pushDownPastProject(eval); - var fieldProjections = asAttributes(eval.fields()); - return projectWithEvalChild.withProjections(mergeOutputExpressions(fieldProjections, projectWithEvalChild.projections())); - } + if (child instanceof OrderBy orderBy) { + Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatedAttributes)); - return eval; - } - } + // Look for attributes in the OrderBy's expressions and create aliases with temporary names for them. + AttributeReplacement nonShadowedOrders = renameAttributesInExpressions(evalFieldNames, orderBy.order()); - // same as for PushDownEval - protected static class PushDownRegexExtract extends OptimizerRules.OptimizerRule { - @Override - protected LogicalPlan rule(RegexExtract re) { - LogicalPlan child = re.child(); + AttributeMap aliasesForShadowedOrderByAttrs = nonShadowedOrders.replacedAttributes; + @SuppressWarnings("unchecked") + List newOrder = (List) (List) nonShadowedOrders.rewrittenExpressions; - if (child instanceof OrderBy orderBy) { - return orderBy.replaceChild(re.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithChild = pushDownPastProject(re); - return projectWithChild.withProjections(mergeOutputExpressions(re.extractedFields(), projectWithChild.projections())); + if (aliasesForShadowedOrderByAttrs.isEmpty() == false) { + List newAliases = new ArrayList<>(aliasesForShadowedOrderByAttrs.values()); + + LogicalPlan plan = new Eval(orderBy.source(), orderBy.child(), newAliases); + plan = generatingPlan.replaceChild(plan); + plan = new OrderBy(orderBy.source(), plan, newOrder); + plan = new Project(generatingPlan.source(), plan, generatingPlan.output()); + + return plan; } - return re; + return orderBy.replaceChild(generatingPlan.replaceChild(orderBy.child())); + } else if (child instanceof Project) { + var projectWithEvalChild = pushDownPastProject(generatingPlan); + return projectWithEvalChild.withProjections(mergeOutputExpressions(generatedAttributes, projectWithEvalChild.projections())); } + + return generatingPlan; } - // TODO double-check: this should be the same as EVAL and GROK/DISSECT, needed to avoid unbounded sort - protected static class PushDownEnrich extends OptimizerRules.OptimizerRule { - @Override - protected LogicalPlan rule(Enrich re) { - LogicalPlan child = re.child(); + private record AttributeReplacement(List rewrittenExpressions, AttributeMap replacedAttributes) {}; - if (child instanceof OrderBy orderBy) { - return orderBy.replaceChild(re.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithChild = pushDownPastProject(re); - var attrs = asAttributes(re.enrichFields()); - return projectWithChild.withProjections(mergeOutputExpressions(attrs, projectWithChild.projections())); - } + /** + * Replace attributes in the given expressions by assigning them temporary names. + * Returns the rewritten expressions and a map with an alias for each replaced attribute; the rewritten expressions reference + * these aliases. + */ + private static AttributeReplacement renameAttributesInExpressions( + Set attributeNamesToRename, + List expressions + ) { + AttributeMap aliasesForReplacedAttributes = new AttributeMap<>(); + List rewrittenExpressions = new ArrayList<>(); + + for (Expression expr : expressions) { + rewrittenExpressions.add(expr.transformUp(Attribute.class, attr -> { + if (attributeNamesToRename.contains(attr.name())) { + Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> { + String tempName = SubstituteSurrogates.rawTemporaryName(a.name(), "temp_name", a.id().toString()); + // TODO: this should be synthetic + return new Alias(a.source(), tempName, null, a, null, false); + }); + return renamedAttribute.toAttribute(); + } - return re; + return attr; + })); } + + return new AttributeReplacement(rewrittenExpressions, aliasesForReplacedAttributes); } protected static class PushDownAndCombineOrderBy extends OptimizerRules.OptimizerRule { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index a1064b5b7d6bc..b9018f56e60de 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -28,7 +28,7 @@ import org.elasticsearch.xpack.esql.plan.physical.RegexExtractExec; import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; -import org.elasticsearch.xpack.ql.common.Failure; +import org.elasticsearch.xpack.ql.common.Failures; import org.elasticsearch.xpack.ql.expression.AttributeSet; import org.elasticsearch.xpack.ql.expression.Expressions; import org.elasticsearch.xpack.ql.plan.QueryPlan; @@ -36,8 +36,6 @@ import org.elasticsearch.xpack.ql.plan.logical.EsRelation; import org.elasticsearch.xpack.ql.plan.logical.LogicalPlan; -import java.util.Collection; - import static org.elasticsearch.xpack.ql.common.Failure.fail; class OptimizerRules { @@ -46,7 +44,7 @@ private OptimizerRules() {} static class DependencyConsistency

> { - void checkPlan(P p, Collection failures) { + void checkPlan(P p, Failures failures) { AttributeSet refs = references(p); AttributeSet input = p.inputSet(); AttributeSet generated = generates(p); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 9dfcffbf48e6e..943d60a3882b7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.optimizer; +import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.QuantileStates; import org.elasticsearch.test.ESTestCase; @@ -42,6 +43,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Div; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mod; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Mul; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; import org.elasticsearch.xpack.esql.parser.EsqlParser; @@ -57,6 +59,7 @@ import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier; import org.elasticsearch.xpack.ql.expression.Alias; import org.elasticsearch.xpack.ql.expression.Attribute; +import org.elasticsearch.xpack.ql.expression.AttributeSet; import org.elasticsearch.xpack.ql.expression.Expression; import org.elasticsearch.xpack.ql.expression.Expressions; import org.elasticsearch.xpack.ql.expression.FieldAttribute; @@ -3266,6 +3269,99 @@ public void testPlanSanityCheck() throws Exception { assertThat(e.getMessage(), containsString(" optimized incorrectly due to missing references [salary")); } + /** + * Pushing down EVAL/GROK/DISSECT/ENRICH must not accidentally shadow attributes required by SORT. + * + * For DISSECT expects the following; the others are similar. + * + * EsqlProject[[first_name{f}#37, emp_no{r}#33, salary{r}#34]] + * \_TopN[[Order[$$emp_no$temp_name$36{r}#46 + $$salary$temp_name$41{r}#47 * 13[INTEGER],ASC,LAST], Order[NEG($$salary$t + * emp_name$41{r}#47),DESC,FIRST]],3[INTEGER]] + * \_Dissect[first_name{f}#37,Parser[pattern=%{emp_no} %{salary}, appendSeparator=, parser=org.elasticsearch.dissect.Dissect + * Parser@b6858b],[emp_no{r}#33, salary{r}#34]] + * \_Eval[[emp_no{f}#36 AS $$emp_no$temp_name$36, salary{f}#41 AS $$salary$temp_name$41]] + * \_EsRelation[test][_meta_field{f}#42, emp_no{f}#36, first_name{f}#37, ..] + */ + public void testPushdownWithOverwrittenName() { + List overwritingCommands = List.of( + "EVAL emp_no = 3*emp_no, salary = -2*emp_no-salary", + "DISSECT first_name \"%{emp_no} %{salary}\"", + "GROK first_name \"%{WORD:emp_no} %{WORD:salary}\"", + "ENRICH languages_idx ON first_name WITH emp_no = language_code, salary = language_code" + ); + + String queryTemplateKeepAfter = """ + FROM test + | SORT 13*(emp_no+salary) ASC, -salary DESC + | {} + | KEEP first_name, emp_no, salary + | LIMIT 3 + """; + // Equivalent but with KEEP first - ensures that attributes in the final projection are correct after pushdown rules were applied. + String queryTemplateKeepFirst = """ + FROM test + | KEEP emp_no, salary, first_name + | SORT 13*(emp_no+salary) ASC, -salary DESC + | {} + | LIMIT 3 + """; + + for (String overwritingCommand : overwritingCommands) { + String queryTemplate = randomBoolean() ? queryTemplateKeepFirst : queryTemplateKeepAfter; + var plan = optimizedPlan(LoggerMessageFormat.format(null, queryTemplate, overwritingCommand)); + + var project = as(plan, Project.class); + var projections = project.projections(); + assertThat(projections.size(), equalTo(3)); + assertThat(projections.get(0).name(), equalTo("first_name")); + assertThat(projections.get(1).name(), equalTo("emp_no")); + assertThat(projections.get(2).name(), equalTo("salary")); + + var topN = as(project.child(), TopN.class); + assertThat(topN.order().size(), is(2)); + + var firstOrderExpr = as(topN.order().get(0), Order.class); + var mul = as(firstOrderExpr.child(), Mul.class); + var add = as(mul.left(), Add.class); + var renamed_emp_no = as(add.left(), ReferenceAttribute.class); + var renamed_salary = as(add.right(), ReferenceAttribute.class); + assertThat(renamed_emp_no.toString(), startsWith("$$emp_no$temp_name")); + assertThat(renamed_salary.toString(), startsWith("$$salary$temp_name")); + + var secondOrderExpr = as(topN.order().get(1), Order.class); + var neg = as(secondOrderExpr.child(), Neg.class); + var renamed_salary2 = as(neg.field(), ReferenceAttribute.class); + assert (renamed_salary2.semanticEquals(renamed_salary) && renamed_salary2.equals(renamed_salary)); + + Eval renamingEval = null; + if (overwritingCommand.startsWith("EVAL")) { + // Multiple EVALs should be merged, so there's only one. + renamingEval = as(topN.child(), Eval.class); + } + if (overwritingCommand.startsWith("DISSECT")) { + var dissect = as(topN.child(), Dissect.class); + renamingEval = as(dissect.child(), Eval.class); + } + if (overwritingCommand.startsWith("GROK")) { + var grok = as(topN.child(), Grok.class); + renamingEval = as(grok.child(), Eval.class); + } + if (overwritingCommand.startsWith("ENRICH")) { + var enrich = as(topN.child(), Enrich.class); + renamingEval = as(enrich.child(), Eval.class); + } + + AttributeSet attributesCreatedInEval = new AttributeSet(); + for (Alias field : renamingEval.fields()) { + attributesCreatedInEval.add(field.toAttribute()); + } + assert (attributesCreatedInEval.contains(renamed_emp_no)); + assert (attributesCreatedInEval.contains(renamed_salary)); + + assertThat(renamingEval.child(), instanceOf(EsRelation.class)); + } + } + private LogicalPlan optimizedPlan(String query) { return plan(query); }