From 163ca7e77d3c508f086adf514f5dde2c831e43d8 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 23 Jul 2024 15:02:28 +0200 Subject: [PATCH] ESQL: Fix variable shadowing when pushing down past Project (#108360) Fix bugs caused by pushing down Eval, Grok, Dissect and Enrich past Rename, where after the pushdown, the columns added shadowed the columns to be renamed. For Dissect and Grok, this enables naming their generated attributes to deviate from the names obtained from the dissect/grok patterns. (cherry picked from commit e8a01bbd9c41ed77239a7b41761d1e58994a034f) # Conflicts: # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java # x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java --- docs/changelog/108360.yaml | 6 + .../xpack/esql/EsqlTestUtils.java | 5 + .../src/main/resources/dissect.csv-spec | 33 +++ .../src/main/resources/enrich.csv-spec | 36 +++ .../src/main/resources/eval.csv-spec | 39 ++++ .../src/main/resources/grok.csv-spec | 33 +++ .../src/main/resources/stats.csv-spec | 59 +++++ .../xpack/esql/action/EsqlCapabilities.java | 8 +- .../xpack/esql/analysis/Analyzer.java | 4 +- .../esql/expression/NamedExpressions.java | 3 +- .../esql/optimizer/LogicalPlanOptimizer.java | 179 +++++++++++--- .../xpack/esql/optimizer/OptimizerRules.java | 13 +- .../esql/optimizer/rules/PushDownEnrich.java | 4 +- .../esql/optimizer/rules/PushDownEval.java | 4 +- .../optimizer/rules/PushDownRegexExtract.java | 2 +- .../ReplaceOrderByExpressionWithEval.java | 2 +- .../ReplaceStatsAggExpressionWithEval.java | 3 +- .../ReplaceStatsNestedExpressionWithEval.java | 3 +- .../optimizer/rules/SubstituteSurrogates.java | 28 +-- .../xpack/esql/parser/LogicalPlanBuilder.java | 16 +- .../xpack/esql/plan/GeneratingPlan.java | 40 ++++ .../xpack/esql/plan/logical/Dissect.java | 19 ++ .../xpack/esql/plan/logical/Enrich.java | 35 ++- .../xpack/esql/plan/logical/Eval.java | 52 ++++- .../xpack/esql/plan/logical/Grok.java | 5 + .../xpack/esql/plan/logical/RegexExtract.java | 31 ++- .../esql/planner/LocalExecutionPlanner.java | 20 +- .../optimizer/LogicalPlanOptimizerTests.java | 219 +++++++++++++++++- .../parser/AbstractStatementParserTests.java | 5 - .../esql/parser/StatementParserTests.java | 1 + 30 files changed, 795 insertions(+), 112 deletions(-) create mode 100644 docs/changelog/108360.yaml create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java diff --git a/docs/changelog/108360.yaml b/docs/changelog/108360.yaml new file mode 100644 index 0000000000000..087dd2649c6aa --- /dev/null +++ b/docs/changelog/108360.yaml @@ -0,0 +1,6 @@ +pr: 108360 +summary: "ESQL: Fix variable shadowing when pushing down past Project" +area: ES|QL +type: bug +issues: + - 108008 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index d7e067658267f..68696ad5ac99a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -31,6 +31,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.Range; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -169,6 +170,10 @@ public static Literal of(Source source, Object value) { return new Literal(source, value, DataType.fromJava(value)); } + public static ReferenceAttribute referenceAttribute(String name, DataType type) { + return new ReferenceAttribute(EMPTY, name, type); + } + public static Range rangeOf(Expression value, Expression lower, boolean includeLower, Expression upper, boolean includeUpper) { return new Range(EMPTY, value, lower, includeLower, upper, includeUpper, randomZone()); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 8c4e797b7982d..38f09d2e3c56e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -75,6 +75,39 @@ first_name:keyword | last_name:keyword | name:keyword | foo:keyword Georgi | Facello | Georgi1 Facello | Facello ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in Switzerland" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in %{foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zurich, the largest city in Switzerland | largest | Switzerland +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME long_city_name AS c +| DISSECT c "Zurich, the %{long_city_name} city in Switzerland" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index ab2ddb84ed969..925c08f317125 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -174,6 +174,42 @@ city:keyword | airport:text Zürich | Zurich Int'l ; +shadowingWhenPushedDownPastRename +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport +; + +city:keyword | a:keyword | airport:text +Zürich | ZRH | Zurich Int'l +; + +shadowingWhenPushedDownPastRename2 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport, region +; + +city:keyword | a:keyword | airport:text | region:text +Zürich | ZRH | Zurich Int'l | Bezirk Zürich +; + +shadowingWhenPushedDownPastRename3 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME city as c +| ENRICH city_names ON c WITH city = airport +; + +c:keyword | airport:keyword | city:text +Zürich | ZRH | Zurich Int'l +; + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 770358e5120da..61a0ccd4af0c5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -55,6 +55,45 @@ x:integer 9999 ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = 3 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 3 +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1, emp_no = emp_no + languages, a = 0, languages = -1 +; + +z:integer | emp_no:integer | a:integer | languages:integer + 10001 | 10004 | 0 | -1 +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 10002 +; + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index d9857e8c122ef..98c88d06caa75 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -76,6 +76,39 @@ San Francisco | CA 94108 | ["CA", "94108"] Tokyo | 100-7014 | null ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:city} %{WORD:city} %{WORD:city}" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:foo} %{WORD:city} %{WORD:foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "in"] | ["city", "Switzerland"] +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME long_city_name AS c +| GROK c "Zürich, the %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name}" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; + complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" | GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index b2080b54b981c..be4342b95c6b8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -575,6 +575,65 @@ ca:l | cx:l | l:i 1 | 1 | null ; +/////////////////////////////////////////////////////////////// +// Test edge case interaction with push down past a rename +// https://github.com/elastic/elasticsearch/issues/108008 +/////////////////////////////////////////////////////////////// + +countSameFieldWithEval +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | eval b = gender | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithDissect +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | dissect gender "%{b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithGrok +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | grok gender "%{USERNAME:b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithEnrich +required_capability: fixed_pushdown_past_project +required_capability: enrich_load +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | null +57 | M | null +; + +countSameFieldWithEnrichLimit0 +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc | limit 0 +; + +c:l | gender:s | b:s +; +/////////////////////////////////////////////////////////////// + aggsWithoutStats from employees | stats by gender | sort gender; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 5641f49b039f6..918e9614f9070 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -139,7 +139,13 @@ public enum Cap { * Fix for non-unique attribute names in ROW and logical plans. * https://github.com/elastic/elasticsearch/issues/110541 */ - UNIQUE_NAMES; + UNIQUE_NAMES, + + /** + * Make attributes of GROK/DISSECT adjustable and fix a shadowing bug when pushing them down past PROJECT. + * https://github.com/elastic/elasticsearch/issues/108008 + */ + FIXED_PUSHDOWN_PAST_PROJECT; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index b41156824be12..37c8cceb3f605 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -65,7 +65,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; -import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -1193,7 +1193,7 @@ private Expression createIfDoesNotAlreadyExist( List unionFieldAttributes ) { // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. - String unionTypedFieldName = SubstituteSurrogates.rawTemporaryName( + String unionTypedFieldName = LogicalPlanOptimizer.rawTemporaryName( fa.name(), "converted_to", resolvedField.getDataType().typeName() diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java index d0c8adfd3c858..624ea9a030208 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java @@ -33,7 +33,8 @@ public static List mergeOutputAttributes( /** * Merges output expressions of a command given the new attributes plus the existing inputs that are emitted as outputs. * As a general rule, child output will come first in the list, followed by the new fields. - * In case of name collisions, only last entry is preserved (previous expressions with the same name are discarded) + * In case of name collisions, only the last entry is preserved (previous expressions with the same name are discarded) + * and the new attributes have precedence over the child output. * @param fields the fields added by the command * @param childOutput the command input that has to be propagated as output * @return diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index ca4b5d17deed3..439289c879c27 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -15,6 +15,9 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -22,6 +25,7 @@ import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.optimizer.rules.AddDefaultTopN; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanFunctionEqualsElimination; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanSimplification; @@ -67,6 +71,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSpatialSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.TranslateMetricsAggregate; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; @@ -74,8 +79,11 @@ import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import static java.util.Arrays.asList; @@ -89,6 +97,34 @@ public LogicalPlanOptimizer(LogicalOptimizerContext optimizerContext) { super(optimizerContext); } + public static String temporaryName(Expression inner, Expression outer, int suffix) { + String in = toString(inner); + String out = toString(outer); + return rawTemporaryName(in, out, String.valueOf(suffix)); + } + + public static String locallyUniqueTemporaryName(String inner, String outer) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + new NameId(); + } + + public static String rawTemporaryName(String inner, String outer, String suffix) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; + } + + static String toString(Expression ex) { + return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); + } + + static String extractString(Expression ex) { + return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); + } + + static int TO_STRING_LIMIT = 16; + + static String limitToString(String string) { + return string.length() > TO_STRING_LIMIT ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; + } + public LogicalPlan optimize(LogicalPlan verified) { var optimized = execute(verified); @@ -189,35 +225,26 @@ public static LogicalPlan skipPlan(UnaryPlan plan, LocalSupplier supplier) { /** * Pushes LogicalPlans which generate new attributes (Eval, Grok/Dissect, Enrich), past OrderBys and Projections. - * Although it seems arbitrary whether the OrderBy or the Eval is executed first, this transformation ensures that OrderBys only - * separated by an eval can be combined by PushDownAndCombineOrderBy. - * - * E.g.: - * - * ... | sort a | eval x = b + 1 | sort x - * - * becomes - * - * ... | eval x = b + 1 | sort a | sort x - * - * Ordering the Evals before the OrderBys has the advantage that it's always possible to order the plans like this. + * Although it seems arbitrary whether the OrderBy or the generating plan is executed first, this transformation ensures that OrderBys + * only separated by e.g. an Eval can be combined by {@link PushDownAndCombineOrderBy}. + *

+ * E.g. {@code ... | sort a | eval x = b + 1 | sort x} becomes {@code ... | eval x = b + 1 | sort a | sort x} + *

+ * Ordering the generating plans before the OrderBys has the advantage that it's always possible to order the plans like this. * E.g., in the example above it would not be possible to put the eval after the two orderBys. - * - * In case one of the Eval's fields would shadow the orderBy's attributes, we rename the attribute first. - * - * E.g. - * - * ... | sort a | eval a = b + 1 | ... - * - * becomes - * - * ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a + *

+ * In case one of the generating plan's attributes would shadow the OrderBy's attributes, we alias the generated attribute first. + *

+ * E.g. {@code ... | sort a | eval a = b + 1 | ...} becomes {@code ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a ...} + *

+ * In case the generating plan's attributes would shadow the Project's attributes, we rename the generated attributes in place. + *

+ * E.g. {@code ... | rename a as z | eval a = b + 1 | ...} becomes {@code ... eval $$a = b + 1 | rename a as z, $$a as a ...} */ - public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan generatingPlan, List generatedAttributes) { + public static > LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(Plan generatingPlan) { LogicalPlan child = generatingPlan.child(); - if (child instanceof OrderBy orderBy) { - Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatedAttributes)); + Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatingPlan.generatedAttributes())); // Look for attributes in the OrderBy's expressions and create aliases with temporary names for them. AttributeReplacement nonShadowedOrders = renameAttributesInExpressions(evalFieldNames, orderBy.order()); @@ -238,9 +265,66 @@ public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan gene } return orderBy.replaceChild(generatingPlan.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithEvalChild = pushDownPastProject(generatingPlan); - return projectWithEvalChild.withProjections(mergeOutputExpressions(generatedAttributes, projectWithEvalChild.projections())); + } else if (child instanceof Project project) { + // We need to account for attribute shadowing: a rename might rely on a name generated in an Eval/Grok/Dissect/Enrich. + // E.g. in: + // + // Eval[[2 * x{f}#1 AS y]] + // \_Project[[x{f}#1, y{f}#2, y{f}#2 AS z]] + // + // Just moving the Eval down breaks z because we shadow y{f}#2. + // Instead, we use a different alias in the Eval, eventually renaming back to y: + // + // Project[[x{f}#1, y{f}#2 as z, $$y{r}#3 as y]] + // \_Eval[[2 * x{f}#1 as $$y]] + + List generatedAttributes = generatingPlan.generatedAttributes(); + + @SuppressWarnings("unchecked") + Plan generatingPlanWithResolvedExpressions = (Plan) resolveRenamesFromProject(generatingPlan, project); + + Set namesReferencedInRenames = new HashSet<>(); + for (NamedExpression ne : project.projections()) { + if (ne instanceof Alias as) { + namesReferencedInRenames.addAll(as.child().references().names()); + } + } + Map renameGeneratedAttributeTo = newNamesForConflictingAttributes( + generatingPlan.generatedAttributes(), + namesReferencedInRenames + ); + List newNames = generatedAttributes.stream() + .map(attr -> renameGeneratedAttributeTo.getOrDefault(attr.name(), attr.name())) + .toList(); + Plan generatingPlanWithRenamedAttributes = generatingPlanWithResolvedExpressions.withGeneratedNames(newNames); + + // Put the project at the top, but include the generated attributes. + // Any generated attributes that had to be renamed need to be re-renamed to their original names. + List generatedAttributesRenamedToOriginal = new ArrayList<>(generatedAttributes.size()); + List renamedGeneratedAttributes = generatingPlanWithRenamedAttributes.generatedAttributes(); + for (int i = 0; i < generatedAttributes.size(); i++) { + Attribute originalAttribute = generatedAttributes.get(i); + Attribute renamedAttribute = renamedGeneratedAttributes.get(i); + if (originalAttribute.name().equals(renamedAttribute.name())) { + generatedAttributesRenamedToOriginal.add(renamedAttribute); + } else { + generatedAttributesRenamedToOriginal.add( + new Alias( + originalAttribute.source(), + originalAttribute.name(), + originalAttribute.qualifier(), + renamedAttribute, + originalAttribute.id(), + originalAttribute.synthetic() + ) + ); + } + } + + Project projectWithGeneratingChild = project.replaceChild(generatingPlanWithRenamedAttributes.replaceChild(project.child())); + return projectWithGeneratingChild.withProjections( + mergeOutputExpressions(generatedAttributesRenamedToOriginal, projectWithGeneratingChild.projections()) + ); } return generatingPlan; @@ -264,8 +348,9 @@ private static AttributeReplacement renameAttributesInExpressions( rewrittenExpressions.add(expr.transformUp(Attribute.class, attr -> { if (attributeNamesToRename.contains(attr.name())) { Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> { - String tempName = SubstituteSurrogates.rawTemporaryName(a.name(), "temp_name", a.id().toString()); + String tempName = locallyUniqueTemporaryName(a.name(), "temp_name"); // TODO: this should be synthetic + // blocked on https://github.com/elastic/elasticsearch/issues/98703 return new Alias(a.source(), tempName, null, a, null, false); }); return renamedAttribute.toAttribute(); @@ -278,16 +363,28 @@ private static AttributeReplacement renameAttributesInExpressions( return new AttributeReplacement(rewrittenExpressions, aliasesForReplacedAttributes); } + private static Map newNamesForConflictingAttributes( + List potentiallyConflictingAttributes, + Set reservedNames + ) { + if (reservedNames.isEmpty()) { + return Map.of(); + } + + Map renameAttributeTo = new HashMap<>(); + for (Attribute attr : potentiallyConflictingAttributes) { + String name = attr.name(); + if (reservedNames.contains(name)) { + renameAttributeTo.putIfAbsent(name, locallyUniqueTemporaryName(name, "temp_name")); + } + } + + return renameAttributeTo; + } + public static Project pushDownPastProject(UnaryPlan parent) { if (parent.child() instanceof Project project) { - AttributeMap.Builder aliasBuilder = AttributeMap.builder(); - project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); - var aliases = aliasBuilder.build(); - - var expressionsWithResolvedAliases = (UnaryPlan) parent.transformExpressionsOnly( - ReferenceAttribute.class, - r -> aliases.resolve(r, r) - ); + UnaryPlan expressionsWithResolvedAliases = resolveRenamesFromProject(parent, project); return project.replaceChild(expressionsWithResolvedAliases.replaceChild(project.child())); } else { @@ -295,6 +392,14 @@ public static Project pushDownPastProject(UnaryPlan parent) { } } + private static UnaryPlan resolveRenamesFromProject(UnaryPlan plan, Project project) { + AttributeMap.Builder aliasBuilder = AttributeMap.builder(); + project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); + var aliases = aliasBuilder.build(); + + return (UnaryPlan) plan.transformExpressionsOnly(ReferenceAttribute.class, r -> aliases.resolve(r, r)); + } + public abstract static class ParameterizedOptimizerRule extends ParameterizedRule< SubPlan, LogicalPlan, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index c02b9948def3f..fe1a66737b17b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -14,12 +14,11 @@ import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; -import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.MvExpand; -import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; @@ -103,18 +102,12 @@ protected AttributeSet generates(LogicalPlan logicalPlan) { || logicalPlan instanceof Aggregate) { return logicalPlan.outputSet(); } - if (logicalPlan instanceof Eval eval) { - return new AttributeSet(Expressions.asAttributes(eval.fields())); - } - if (logicalPlan instanceof RegexExtract extract) { - return new AttributeSet(extract.extractedFields()); + if (logicalPlan instanceof GeneratingPlan generating) { + return new AttributeSet(generating.generatedAttributes()); } if (logicalPlan instanceof MvExpand mvExpand) { return new AttributeSet(mvExpand.expanded()); } - if (logicalPlan instanceof Enrich enrich) { - return new AttributeSet(Expressions.asAttributes(enrich.enrichFields())); - } return AttributeSet.EMPTY; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java index f6a0154108f2d..7e102a36828a9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEnrich extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Enrich en) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en, asAttributes(en.enrichFields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java index b936e5569c950..e9b42be8dd397 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Eval; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEval extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Eval eval) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval, asAttributes(eval.fields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java index f247d0a631b29..43e13a582276b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java @@ -15,6 +15,6 @@ public final class PushDownRegexExtract extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(RegexExtract re) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re, re.extractedFields()); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java index 476da7476f7fb..e9900d96710a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java @@ -19,7 +19,7 @@ import java.util.ArrayList; import java.util.List; -import static org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates.rawTemporaryName; +import static org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.rawTemporaryName; public final class ReplaceOrderByExpressionWithEval extends OptimizerRules.OptimizerRule { private static int counter = 0; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java index 012d6e307df6c..dbe518770c78d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.util.CollectionUtils; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -150,6 +151,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, Expression af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java index 99b0c8047f2ba..099e76010488e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -141,6 +142,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, AggregateFunction af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java index b734a72ef5e22..b119d01715dce 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java @@ -14,13 +14,13 @@ import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.aggregate.Rate; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -81,7 +81,7 @@ protected LogicalPlan rule(Aggregate aggregate) { var attr = aggFuncToAttr.get(af); // the agg doesn't exist in the Aggregate, create an alias for it and save its attribute if (attr == null) { - var temporaryName = temporaryName(af, agg, counter[0]++); + var temporaryName = LogicalPlanOptimizer.temporaryName(af, agg, counter[0]++); // create a synthetic alias (so it doesn't clash with a user defined name) var newAlias = new Alias(agg.source(), temporaryName, null, af, null, true); attr = newAlias.toAttribute(); @@ -134,28 +134,4 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } - - public static String temporaryName(Expression inner, Expression outer, int suffix) { - String in = toString(inner); - String out = toString(outer); - return rawTemporaryName(in, out, String.valueOf(suffix)); - } - - public static String rawTemporaryName(String inner, String outer, String suffix) { - return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; - } - - static int TO_STRING_LIMIT = 16; - - static String toString(Expression ex) { - return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); - } - - static String extractString(Expression ex) { - return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); - } - - static String limitToString(String string) { - return string.length() > 16 ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index 526cf7f17440d..85103109abed1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.parser.ParserUtils; @@ -194,21 +193,20 @@ public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) try { DissectParser parser = new DissectParser(pattern, appendSeparator); + Set referenceKeys = parser.referenceKeys(); - if (referenceKeys.size() > 0) { + if (referenceKeys.isEmpty() == false) { throw new ParsingException( src, "Reference keys not supported in dissect patterns: [%{*{}}]", referenceKeys.iterator().next() ); } - List keys = new ArrayList<>(); - for (var x : parser.outputKeys()) { - if (x.isEmpty() == false) { - keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); - } - } - return new Dissect(src, p, expression(ctx.primaryExpression()), new Dissect.Parser(pattern, appendSeparator, parser), keys); + + Dissect.Parser esqlDissectParser = new Dissect.Parser(pattern, appendSeparator, parser); + List keys = esqlDissectParser.keyAttributes(src); + + return new Dissect(src, p, expression(ctx.primaryExpression()), esqlDissectParser, keys); } catch (DissectException e) { throw new ParsingException(src, "Invalid pattern for dissect: [{}]", pattern); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java new file mode 100644 index 0000000000000..0253ac8dafd84 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; + +import java.util.List; + +/** + * A plan that creates new {@link Attribute}s and appends them to the child + * {@link org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan}'s attributes. + * Attributes are appended on the right hand side of the child's input. In case of name conflicts, the rightmost attribute with + * a given name shadows any attributes left of it + * (c.f. {@link org.elasticsearch.xpack.esql.expression.NamedExpressions#mergeOutputAttributes(List, List)}). + */ +public interface GeneratingPlan> { + List generatedAttributes(); + + /** + * Create a new instance of this node with new output {@link Attribute}s using the given names. + * If an output attribute already has the desired name, we continue using it; otherwise, we + * create a new attribute with a new {@link org.elasticsearch.xpack.esql.core.expression.NameId}. + */ + // TODO: the generated attributes should probably become synthetic once renamed + // blocked on https://github.com/elastic/elasticsearch/issues/98703 + PlanType withGeneratedNames(List newNames); + + default void checkNumberOfNewNames(List newNames) { + if (newNames.size() != generatedAttributes().size()) { + throw new IllegalArgumentException( + "Number of new names is [" + newNames.size() + "] but there are [" + generatedAttributes().size() + "] existing names." + ); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java index 1307d1870bba4..58167381ea9e8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java @@ -10,11 +10,14 @@ import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,6 +26,17 @@ public class Dissect extends RegexExtract { public record Parser(String pattern, String appendSeparator, DissectParser parser) { + public List keyAttributes(Source src) { + List keys = new ArrayList<>(); + for (var x : parser.outputKeys()) { + if (x.isEmpty() == false) { + keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); + } + } + + return keys; + } + // Override hashCode and equals since the parser is considered equal if its pattern and // appendSeparator are equal ( and DissectParser uses reference equality ) @Override @@ -54,6 +68,11 @@ protected NodeInfo info() { return NodeInfo.create(this, Dissect::new, child(), input, parser, extractedFields); } + @Override + public Dissect withGeneratedNames(List newNames) { + return new Dissect(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java index f418ab5da1c9d..5a3b5b5d1875f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java @@ -10,27 +10,34 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; +import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Enrich extends UnaryPlan { +public class Enrich extends UnaryPlan implements GeneratingPlan { private final Expression policyName; private final NamedExpression matchField; private final EnrichPolicy policy; private final Map concreteIndices; // cluster -> enrich indices + // This could be simplified by just always using an Alias. private final List enrichFields; private List output; @@ -128,6 +135,32 @@ public List output() { return output; } + @Override + public List generatedAttributes() { + return asAttributes(enrichFields); + } + + @Override + public Enrich withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + List newEnrichFields = new ArrayList<>(enrichFields.size()); + for (int i = 0; i < enrichFields.size(); i++) { + NamedExpression enrichField = enrichFields.get(i); + String newName = newNames.get(i); + if (enrichField.name().equals(newName)) { + newEnrichFields.add(enrichField); + } else if (enrichField instanceof ReferenceAttribute ra) { + newEnrichFields.add(new Alias(ra.source(), newName, ra.qualifier(), ra, new NameId(), ra.synthetic())); + } else if (enrichField instanceof Alias a) { + newEnrichFields.add(new Alias(a.source(), newName, a.qualifier(), a.child(), new NameId(), a.synthetic())); + } else { + throw new IllegalArgumentException("Enrich field must be Alias or ReferenceAttribute"); + } + } + return new Enrich(source(), child(), mode(), policyName(), matchField(), policy(), concreteIndices(), newEnrichFields); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java index bfe11c3d33d87..108122d4b163c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java @@ -10,17 +10,23 @@ import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.AttributeMap; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Eval extends UnaryPlan { +public class Eval extends UnaryPlan implements GeneratingPlan { private final List fields; private List lazyOutput; @@ -43,6 +49,50 @@ public List output() { return lazyOutput; } + @Override + public List generatedAttributes() { + return asAttributes(fields); + } + + @Override + public Eval withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + return new Eval(source(), child(), renameAliases(fields, newNames)); + } + + private List renameAliases(List originalAttributes, List newNames) { + AttributeMap.Builder aliasReplacedByBuilder = AttributeMap.builder(); + List newFields = new ArrayList<>(originalAttributes.size()); + for (int i = 0; i < originalAttributes.size(); i++) { + Alias field = originalAttributes.get(i); + String newName = newNames.get(i); + if (field.name().equals(newName)) { + newFields.add(field); + } else { + Alias newField = new Alias(field.source(), newName, field.qualifier(), field.child(), new NameId(), field.synthetic()); + newFields.add(newField); + aliasReplacedByBuilder.put(field.toAttribute(), newField.toAttribute()); + } + } + AttributeMap aliasReplacedBy = aliasReplacedByBuilder.build(); + + // We need to also update any references to the old attributes in the new attributes; e.g. + // EVAL x = 1, y = x + 1 + // renaming x, y to x1, y1 + // so far became + // EVAL x1 = 1, y1 = x + 1 + // - but x doesn't exist anymore, so replace it by x1 to obtain + // EVAL x1 = 1, y1 = x1 + 1 + + List newFieldsWithUpdatedRefs = new ArrayList<>(originalAttributes.size()); + for (Alias newField : newFields) { + newFieldsWithUpdatedRefs.add((Alias) newField.transformUp(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r))); + } + + return newFieldsWithUpdatedRefs; + } + @Override public boolean expressionsResolved() { return Resolvables.resolved(fields); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java index e495a2eb76668..3bd870e326157 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java @@ -105,6 +105,11 @@ public List output() { return NamedExpressions.mergeOutputAttributes(extractedFields, child().output()); } + @Override + public Grok withGeneratedNames(List newNames) { + return new Grok(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java index 5bf45fc0f61ad..7c1d457c18e55 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java @@ -9,16 +9,19 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public abstract class RegexExtract extends UnaryPlan { +public abstract class RegexExtract extends UnaryPlan implements GeneratingPlan { protected final Expression input; protected final List extractedFields; @@ -42,10 +45,36 @@ public Expression input() { return input; } + /** + * Upon parsing, these are named according to the {@link Dissect} or {@link Grok} pattern, but can be renamed without changing the + * pattern. + */ public List extractedFields() { return extractedFields; } + @Override + public List generatedAttributes() { + return extractedFields; + } + + List renameExtractedFields(List newNames) { + checkNumberOfNewNames(newNames); + + List renamedExtractedFields = new ArrayList<>(extractedFields.size()); + for (int i = 0; i < newNames.size(); i++) { + Attribute extractedField = extractedFields.get(i); + String newName = newNames.get(i); + if (extractedField.name().equals(newName)) { + renamedExtractedFields.add(extractedField); + } else { + renamedExtractedFields.add(extractedFields.get(i).withName(newNames.get(i)).withId(new NameId())); + } + } + + return renamedExtractedFields; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index ddf5fa6eaf8a3..28855abfff73c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -58,6 +58,8 @@ import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichLookupOperator; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; @@ -417,12 +419,14 @@ private PhysicalOperation planDissect(DissectExec dissect, LocalExecutionPlanner Layout.Builder layoutBuilder = source.layout.builder(); layoutBuilder.append(dissect.extractedFields()); final Expression expr = dissect.inputExpression(); - String[] attributeNames = Expressions.names(dissect.extractedFields()).toArray(new String[0]); + // Names in the pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String[] patternNames = Expressions.names(dissect.parser().keyAttributes(Source.EMPTY)).toArray(new String[0]); Layout layout = layoutBuilder.build(); source = source.with( new StringExtractOperator.StringExtractOperatorFactory( - attributeNames, + patternNames, EvalMapper.toEvaluator(expr, layout), () -> (input) -> dissect.parser().parser().parse(input) ), @@ -439,11 +443,15 @@ private PhysicalOperation planGrok(GrokExec grok, LocalExecutionPlannerContext c Map fieldToPos = new HashMap<>(extractedFields.size()); Map fieldToType = new HashMap<>(extractedFields.size()); ElementType[] types = new ElementType[extractedFields.size()]; + List extractedFieldsFromPattern = grok.pattern().extractedFields(); for (int i = 0; i < extractedFields.size(); i++) { - Attribute extractedField = extractedFields.get(i); - ElementType type = PlannerUtils.toElementType(extractedField.dataType()); - fieldToPos.put(extractedField.name(), i); - fieldToType.put(extractedField.name(), type); + DataType extractedFieldType = extractedFields.get(i).dataType(); + // Names in pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String patternName = extractedFieldsFromPattern.get(i).name(); + ElementType type = PlannerUtils.toElementType(extractedFieldType); + fieldToPos.put(patternName, i); + fieldToType.put(patternName, type); types[i] = type; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e7a999b892f44..669de17891583 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.QuantileStates; import org.elasticsearch.core.Tuple; +import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -21,6 +22,7 @@ import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; import org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.core.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; @@ -42,6 +44,7 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.index.IndexResolution; +import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules; import org.elasticsearch.xpack.esql.core.plan.logical.Filter; import org.elasticsearch.xpack.esql.core.plan.logical.Limit; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -71,6 +74,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateExtract; @@ -114,9 +118,13 @@ import org.elasticsearch.xpack.esql.optimizer.rules.LiteralsOnTheRight; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineFilters; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineLimits; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEnrich; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEval; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownRegexExtract; import org.elasticsearch.xpack.esql.optimizer.rules.SplitInWithFoldableValue; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.ParsingException; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Dissect; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -140,6 +148,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import static java.util.Arrays.asList; @@ -157,6 +166,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.getFieldAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.localSource; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyze; @@ -188,6 +198,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -1021,7 +1032,7 @@ public void testPushDownDissectPastProject() { var keep = as(plan, Project.class); var dissect = as(keep.child(), Dissect.class); - assertThat(dissect.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(dissect.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownGrokPastProject() { @@ -1034,7 +1045,7 @@ public void testPushDownGrokPastProject() { var keep = as(plan, Project.class); var grok = as(keep.child(), Grok.class); - assertThat(grok.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(grok.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownFilterPastProjectUsingEval() { @@ -4254,6 +4265,210 @@ public void testPushdownWithOverwrittenName() { } } + record PushdownShadowingGeneratingPlanTestCase( + BiFunction applyLogicalPlan, + OptimizerRules.OptimizerRule rule + ) {}; + + static PushdownShadowingGeneratingPlanTestCase[] PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES = { + // | EVAL y = to_integer(x), y = y + 1 + new PushdownShadowingGeneratingPlanTestCase((plan, attr) -> { + Alias y1 = new Alias(EMPTY, "y", new ToInteger(EMPTY, attr)); + Alias y2 = new Alias(EMPTY, "y", new Add(EMPTY, y1.toAttribute(), new Literal(EMPTY, 1, INTEGER))); + return new Eval(EMPTY, plan, List.of(y1, y2)); + }, new PushDownEval()), + // | DISSECT x "%{y} %{y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Dissect( + EMPTY, + plan, + attr, + new Dissect.Parser("%{y} %{y}", ",", new DissectParser("%{y} %{y}", ",")), + List.of(new ReferenceAttribute(EMPTY, "y", KEYWORD), new ReferenceAttribute(EMPTY, "y", KEYWORD)) + ), + new PushDownRegexExtract() + ), + // | GROK x "%{WORD:y} %{WORD:y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Grok(EMPTY, plan, attr, Grok.pattern(EMPTY, "%{WORD:y} %{WORD:y}")), + new PushDownRegexExtract() + ), + // | ENRICH some_policy ON x WITH y = some_enrich_idx_field, y = some_other_enrich_idx_field + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Enrich( + EMPTY, + plan, + Enrich.Mode.ANY, + new Literal(EMPTY, "some_policy", KEYWORD), + attr, + null, + Map.of(), + List.of( + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_enrich_idx_field", KEYWORD)), + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_other_enrich_idx_field", KEYWORD)) + ) + ), + new PushDownEnrich() + ) }; + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[y{r}#3, x{r}#2]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * We can freely push down the Eval without renaming, but need to update the Project's references. + * + * Project[[x{r}#2, y{r}#6 AS y]] + * \_Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), x.toAttribute())); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + assertEquals(newGeneratedExprs, initialGeneratedExprs); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGenerated = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + assertThat(projections, everyItem(instanceOf(ReferenceAttribute.class))); + ReferenceAttribute yShadowed = as(projections.get(1), ReferenceAttribute.class); + assertTrue(yShadowed.semanticEquals(rightmostGenerated)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[x{r}#2, y{r}#3, y{r}#3 AS z]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#3, so we rename. + * + * Project[[x{r}#2, y{r}#3 AS z, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(x{r}#2) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project( + EMPTY, + initialRow, + List.of(x.toAttribute(), y.toAttribute(), new Alias(EMPTY, "z", y.toAttribute())) + ); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "z", "y")); + assertThat(projections.get(0), instanceOf(ReferenceAttribute.class)); + Alias zAlias = as(projections.get(1), Alias.class); + ReferenceAttribute yRenamed = as(zAlias.child(), ReferenceAttribute.class); + assertEquals(yRenamed.name(), "y"); + Alias yAlias = as(projections.get(2), Alias.class); + ReferenceAttribute yTempRenamed = as(yAlias.child(), ReferenceAttribute.class); + assertTrue(yTempRenamed.semanticEquals(rightmostGeneratedWithNewName)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#3 + 1[INTEGER] AS y]] + * \_Project[[y{r}#1, y{r}#1 AS x]] + * \_Row[[2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#1, so we rename. + * Additionally, the rename "y AS x" needs to be propagated into the Eval. + * + * Project[[y{r}#1 AS x, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(y{r}#1) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProjectWithResolution() { + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + Alias yAliased = new Alias(EMPTY, "x", y.toAttribute()); + LogicalPlan initialRow = new Row(EMPTY, List.of(y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), yAliased)); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, yAliased.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + // This ensures that our generating plan doesn't use invalid references, resp. that any rename from the Project has + // been propagated into the generating plan. + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + Alias yRenamed = as(projections.get(0), Alias.class); + assertTrue(yRenamed.child().semanticEquals(y.toAttribute())); + Alias yTempRenamed = as(projections.get(1), Alias.class); + ReferenceAttribute yTemp = as(yTempRenamed.child(), ReferenceAttribute.class); + assertTrue(yTemp.semanticEquals(rightmostGeneratedWithNewName)); + } + } + /** * Expects * Project[[min{r}#4, languages{f}#11]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java index 545f3efe8ca79..63204b4dd797d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -55,10 +54,6 @@ static UnresolvedAttribute attribute(String name) { return new UnresolvedAttribute(EMPTY, name); } - static ReferenceAttribute referenceAttribute(String name, DataType type) { - return new ReferenceAttribute(EMPTY, name, type); - } - static Literal integer(int i) { return new Literal(EMPTY, i, DataType.INTEGER); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 111c90790caf0..a195fb8180bf3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -53,6 +53,7 @@ import java.util.function.Function; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.core.expression.Literal.FALSE; import static org.elasticsearch.xpack.esql.core.expression.Literal.TRUE; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;