diff --git a/docs/changelog/108360.yaml b/docs/changelog/108360.yaml new file mode 100644 index 0000000000000..087dd2649c6aa --- /dev/null +++ b/docs/changelog/108360.yaml @@ -0,0 +1,6 @@ +pr: 108360 +summary: "ESQL: Fix variable shadowing when pushing down past Project" +area: ES|QL +type: bug +issues: + - 108008 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index d7e067658267f..68696ad5ac99a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -31,6 +31,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.Range; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -169,6 +170,10 @@ public static Literal of(Source source, Object value) { return new Literal(source, value, DataType.fromJava(value)); } + public static ReferenceAttribute referenceAttribute(String name, DataType type) { + return new ReferenceAttribute(EMPTY, name, type); + } + public static Range rangeOf(Expression value, Expression lower, boolean includeLower, Expression upper, boolean includeUpper) { return new Range(EMPTY, value, lower, includeLower, upper, includeUpper, randomZone()); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 8c4e797b7982d..38f09d2e3c56e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -75,6 +75,39 @@ first_name:keyword | last_name:keyword | name:keyword | foo:keyword Georgi | Facello | Georgi1 Facello | Facello ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in Switzerland" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in %{foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zurich, the largest city in Switzerland | largest | Switzerland +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME long_city_name AS c +| DISSECT c "Zurich, the %{long_city_name} city in Switzerland" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index ab2ddb84ed969..925c08f317125 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -174,6 +174,42 @@ city:keyword | airport:text Zürich | Zurich Int'l ; +shadowingWhenPushedDownPastRename +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport +; + +city:keyword | a:keyword | airport:text +Zürich | ZRH | Zurich Int'l +; + +shadowingWhenPushedDownPastRename2 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport, region +; + +city:keyword | a:keyword | airport:text | region:text +Zürich | ZRH | Zurich Int'l | Bezirk Zürich +; + +shadowingWhenPushedDownPastRename3 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME city as c +| ENRICH city_names ON c WITH city = airport +; + +c:keyword | airport:keyword | city:text +Zürich | ZRH | Zurich Int'l +; + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 770358e5120da..61a0ccd4af0c5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -55,6 +55,45 @@ x:integer 9999 ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = 3 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 3 +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1, emp_no = emp_no + languages, a = 0, languages = -1 +; + +z:integer | emp_no:integer | a:integer | languages:integer + 10001 | 10004 | 0 | -1 +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 10002 +; + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index d9857e8c122ef..98c88d06caa75 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -76,6 +76,39 @@ San Francisco | CA 94108 | ["CA", "94108"] Tokyo | 100-7014 | null ; +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:city} %{WORD:city} %{WORD:city}" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:foo} %{WORD:city} %{WORD:foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "in"] | ["city", "Switzerland"] +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME long_city_name AS c +| GROK c "Zürich, the %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name}" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; + complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" | GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index b2080b54b981c..be4342b95c6b8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -575,6 +575,65 @@ ca:l | cx:l | l:i 1 | 1 | null ; +/////////////////////////////////////////////////////////////// +// Test edge case interaction with push down past a rename +// https://github.com/elastic/elasticsearch/issues/108008 +/////////////////////////////////////////////////////////////// + +countSameFieldWithEval +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | eval b = gender | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithDissect +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | dissect gender "%{b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithGrok +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | grok gender "%{USERNAME:b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithEnrich +required_capability: fixed_pushdown_past_project +required_capability: enrich_load +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | null +57 | M | null +; + +countSameFieldWithEnrichLimit0 +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc | limit 0 +; + +c:l | gender:s | b:s +; +/////////////////////////////////////////////////////////////// + aggsWithoutStats from employees | stats by gender | sort gender; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 5641f49b039f6..918e9614f9070 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -139,7 +139,13 @@ public enum Cap { * Fix for non-unique attribute names in ROW and logical plans. * https://github.com/elastic/elasticsearch/issues/110541 */ - UNIQUE_NAMES; + UNIQUE_NAMES, + + /** + * Make attributes of GROK/DISSECT adjustable and fix a shadowing bug when pushing them down past PROJECT. + * https://github.com/elastic/elasticsearch/issues/108008 + */ + FIXED_PUSHDOWN_PAST_PROJECT; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index b41156824be12..37c8cceb3f605 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -65,7 +65,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; -import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -1193,7 +1193,7 @@ private Expression createIfDoesNotAlreadyExist( List unionFieldAttributes ) { // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. - String unionTypedFieldName = SubstituteSurrogates.rawTemporaryName( + String unionTypedFieldName = LogicalPlanOptimizer.rawTemporaryName( fa.name(), "converted_to", resolvedField.getDataType().typeName() diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java index d0c8adfd3c858..624ea9a030208 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java @@ -33,7 +33,8 @@ public static List mergeOutputAttributes( /** * Merges output expressions of a command given the new attributes plus the existing inputs that are emitted as outputs. * As a general rule, child output will come first in the list, followed by the new fields. - * In case of name collisions, only last entry is preserved (previous expressions with the same name are discarded) + * In case of name collisions, only the last entry is preserved (previous expressions with the same name are discarded) + * and the new attributes have precedence over the child output. * @param fields the fields added by the command * @param childOutput the command input that has to be propagated as output * @return diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index ca4b5d17deed3..439289c879c27 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -15,6 +15,9 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -22,6 +25,7 @@ import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.optimizer.rules.AddDefaultTopN; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanFunctionEqualsElimination; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanSimplification; @@ -67,6 +71,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSpatialSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.TranslateMetricsAggregate; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; @@ -74,8 +79,11 @@ import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import static java.util.Arrays.asList; @@ -89,6 +97,34 @@ public LogicalPlanOptimizer(LogicalOptimizerContext optimizerContext) { super(optimizerContext); } + public static String temporaryName(Expression inner, Expression outer, int suffix) { + String in = toString(inner); + String out = toString(outer); + return rawTemporaryName(in, out, String.valueOf(suffix)); + } + + public static String locallyUniqueTemporaryName(String inner, String outer) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + new NameId(); + } + + public static String rawTemporaryName(String inner, String outer, String suffix) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; + } + + static String toString(Expression ex) { + return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); + } + + static String extractString(Expression ex) { + return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); + } + + static int TO_STRING_LIMIT = 16; + + static String limitToString(String string) { + return string.length() > TO_STRING_LIMIT ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; + } + public LogicalPlan optimize(LogicalPlan verified) { var optimized = execute(verified); @@ -189,35 +225,26 @@ public static LogicalPlan skipPlan(UnaryPlan plan, LocalSupplier supplier) { /** * Pushes LogicalPlans which generate new attributes (Eval, Grok/Dissect, Enrich), past OrderBys and Projections. - * Although it seems arbitrary whether the OrderBy or the Eval is executed first, this transformation ensures that OrderBys only - * separated by an eval can be combined by PushDownAndCombineOrderBy. - * - * E.g.: - * - * ... | sort a | eval x = b + 1 | sort x - * - * becomes - * - * ... | eval x = b + 1 | sort a | sort x - * - * Ordering the Evals before the OrderBys has the advantage that it's always possible to order the plans like this. + * Although it seems arbitrary whether the OrderBy or the generating plan is executed first, this transformation ensures that OrderBys + * only separated by e.g. an Eval can be combined by {@link PushDownAndCombineOrderBy}. + *

+ * E.g. {@code ... | sort a | eval x = b + 1 | sort x} becomes {@code ... | eval x = b + 1 | sort a | sort x} + *

+ * Ordering the generating plans before the OrderBys has the advantage that it's always possible to order the plans like this. * E.g., in the example above it would not be possible to put the eval after the two orderBys. - * - * In case one of the Eval's fields would shadow the orderBy's attributes, we rename the attribute first. - * - * E.g. - * - * ... | sort a | eval a = b + 1 | ... - * - * becomes - * - * ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a + *

+ * In case one of the generating plan's attributes would shadow the OrderBy's attributes, we alias the generated attribute first. + *

+ * E.g. {@code ... | sort a | eval a = b + 1 | ...} becomes {@code ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a ...} + *

+ * In case the generating plan's attributes would shadow the Project's attributes, we rename the generated attributes in place. + *

+ * E.g. {@code ... | rename a as z | eval a = b + 1 | ...} becomes {@code ... eval $$a = b + 1 | rename a as z, $$a as a ...} */ - public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan generatingPlan, List generatedAttributes) { + public static > LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(Plan generatingPlan) { LogicalPlan child = generatingPlan.child(); - if (child instanceof OrderBy orderBy) { - Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatedAttributes)); + Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatingPlan.generatedAttributes())); // Look for attributes in the OrderBy's expressions and create aliases with temporary names for them. AttributeReplacement nonShadowedOrders = renameAttributesInExpressions(evalFieldNames, orderBy.order()); @@ -238,9 +265,66 @@ public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan gene } return orderBy.replaceChild(generatingPlan.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithEvalChild = pushDownPastProject(generatingPlan); - return projectWithEvalChild.withProjections(mergeOutputExpressions(generatedAttributes, projectWithEvalChild.projections())); + } else if (child instanceof Project project) { + // We need to account for attribute shadowing: a rename might rely on a name generated in an Eval/Grok/Dissect/Enrich. + // E.g. in: + // + // Eval[[2 * x{f}#1 AS y]] + // \_Project[[x{f}#1, y{f}#2, y{f}#2 AS z]] + // + // Just moving the Eval down breaks z because we shadow y{f}#2. + // Instead, we use a different alias in the Eval, eventually renaming back to y: + // + // Project[[x{f}#1, y{f}#2 as z, $$y{r}#3 as y]] + // \_Eval[[2 * x{f}#1 as $$y]] + + List generatedAttributes = generatingPlan.generatedAttributes(); + + @SuppressWarnings("unchecked") + Plan generatingPlanWithResolvedExpressions = (Plan) resolveRenamesFromProject(generatingPlan, project); + + Set namesReferencedInRenames = new HashSet<>(); + for (NamedExpression ne : project.projections()) { + if (ne instanceof Alias as) { + namesReferencedInRenames.addAll(as.child().references().names()); + } + } + Map renameGeneratedAttributeTo = newNamesForConflictingAttributes( + generatingPlan.generatedAttributes(), + namesReferencedInRenames + ); + List newNames = generatedAttributes.stream() + .map(attr -> renameGeneratedAttributeTo.getOrDefault(attr.name(), attr.name())) + .toList(); + Plan generatingPlanWithRenamedAttributes = generatingPlanWithResolvedExpressions.withGeneratedNames(newNames); + + // Put the project at the top, but include the generated attributes. + // Any generated attributes that had to be renamed need to be re-renamed to their original names. + List generatedAttributesRenamedToOriginal = new ArrayList<>(generatedAttributes.size()); + List renamedGeneratedAttributes = generatingPlanWithRenamedAttributes.generatedAttributes(); + for (int i = 0; i < generatedAttributes.size(); i++) { + Attribute originalAttribute = generatedAttributes.get(i); + Attribute renamedAttribute = renamedGeneratedAttributes.get(i); + if (originalAttribute.name().equals(renamedAttribute.name())) { + generatedAttributesRenamedToOriginal.add(renamedAttribute); + } else { + generatedAttributesRenamedToOriginal.add( + new Alias( + originalAttribute.source(), + originalAttribute.name(), + originalAttribute.qualifier(), + renamedAttribute, + originalAttribute.id(), + originalAttribute.synthetic() + ) + ); + } + } + + Project projectWithGeneratingChild = project.replaceChild(generatingPlanWithRenamedAttributes.replaceChild(project.child())); + return projectWithGeneratingChild.withProjections( + mergeOutputExpressions(generatedAttributesRenamedToOriginal, projectWithGeneratingChild.projections()) + ); } return generatingPlan; @@ -264,8 +348,9 @@ private static AttributeReplacement renameAttributesInExpressions( rewrittenExpressions.add(expr.transformUp(Attribute.class, attr -> { if (attributeNamesToRename.contains(attr.name())) { Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> { - String tempName = SubstituteSurrogates.rawTemporaryName(a.name(), "temp_name", a.id().toString()); + String tempName = locallyUniqueTemporaryName(a.name(), "temp_name"); // TODO: this should be synthetic + // blocked on https://github.com/elastic/elasticsearch/issues/98703 return new Alias(a.source(), tempName, null, a, null, false); }); return renamedAttribute.toAttribute(); @@ -278,16 +363,28 @@ private static AttributeReplacement renameAttributesInExpressions( return new AttributeReplacement(rewrittenExpressions, aliasesForReplacedAttributes); } + private static Map newNamesForConflictingAttributes( + List potentiallyConflictingAttributes, + Set reservedNames + ) { + if (reservedNames.isEmpty()) { + return Map.of(); + } + + Map renameAttributeTo = new HashMap<>(); + for (Attribute attr : potentiallyConflictingAttributes) { + String name = attr.name(); + if (reservedNames.contains(name)) { + renameAttributeTo.putIfAbsent(name, locallyUniqueTemporaryName(name, "temp_name")); + } + } + + return renameAttributeTo; + } + public static Project pushDownPastProject(UnaryPlan parent) { if (parent.child() instanceof Project project) { - AttributeMap.Builder aliasBuilder = AttributeMap.builder(); - project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); - var aliases = aliasBuilder.build(); - - var expressionsWithResolvedAliases = (UnaryPlan) parent.transformExpressionsOnly( - ReferenceAttribute.class, - r -> aliases.resolve(r, r) - ); + UnaryPlan expressionsWithResolvedAliases = resolveRenamesFromProject(parent, project); return project.replaceChild(expressionsWithResolvedAliases.replaceChild(project.child())); } else { @@ -295,6 +392,14 @@ public static Project pushDownPastProject(UnaryPlan parent) { } } + private static UnaryPlan resolveRenamesFromProject(UnaryPlan plan, Project project) { + AttributeMap.Builder aliasBuilder = AttributeMap.builder(); + project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); + var aliases = aliasBuilder.build(); + + return (UnaryPlan) plan.transformExpressionsOnly(ReferenceAttribute.class, r -> aliases.resolve(r, r)); + } + public abstract static class ParameterizedOptimizerRule extends ParameterizedRule< SubPlan, LogicalPlan, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index c02b9948def3f..fe1a66737b17b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -14,12 +14,11 @@ import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; -import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.MvExpand; -import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; @@ -103,18 +102,12 @@ protected AttributeSet generates(LogicalPlan logicalPlan) { || logicalPlan instanceof Aggregate) { return logicalPlan.outputSet(); } - if (logicalPlan instanceof Eval eval) { - return new AttributeSet(Expressions.asAttributes(eval.fields())); - } - if (logicalPlan instanceof RegexExtract extract) { - return new AttributeSet(extract.extractedFields()); + if (logicalPlan instanceof GeneratingPlan generating) { + return new AttributeSet(generating.generatedAttributes()); } if (logicalPlan instanceof MvExpand mvExpand) { return new AttributeSet(mvExpand.expanded()); } - if (logicalPlan instanceof Enrich enrich) { - return new AttributeSet(Expressions.asAttributes(enrich.enrichFields())); - } return AttributeSet.EMPTY; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java index f6a0154108f2d..7e102a36828a9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEnrich extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Enrich en) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en, asAttributes(en.enrichFields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java index b936e5569c950..e9b42be8dd397 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Eval; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEval extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Eval eval) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval, asAttributes(eval.fields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java index f247d0a631b29..43e13a582276b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java @@ -15,6 +15,6 @@ public final class PushDownRegexExtract extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(RegexExtract re) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re, re.extractedFields()); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java index 476da7476f7fb..e9900d96710a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java @@ -19,7 +19,7 @@ import java.util.ArrayList; import java.util.List; -import static org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates.rawTemporaryName; +import static org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.rawTemporaryName; public final class ReplaceOrderByExpressionWithEval extends OptimizerRules.OptimizerRule { private static int counter = 0; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java index 012d6e307df6c..dbe518770c78d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.util.CollectionUtils; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -150,6 +151,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, Expression af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java index 99b0c8047f2ba..099e76010488e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -141,6 +142,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, AggregateFunction af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java index b734a72ef5e22..b119d01715dce 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java @@ -14,13 +14,13 @@ import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; -import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.expression.SurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.aggregate.Rate; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -81,7 +81,7 @@ protected LogicalPlan rule(Aggregate aggregate) { var attr = aggFuncToAttr.get(af); // the agg doesn't exist in the Aggregate, create an alias for it and save its attribute if (attr == null) { - var temporaryName = temporaryName(af, agg, counter[0]++); + var temporaryName = LogicalPlanOptimizer.temporaryName(af, agg, counter[0]++); // create a synthetic alias (so it doesn't clash with a user defined name) var newAlias = new Alias(agg.source(), temporaryName, null, af, null, true); attr = newAlias.toAttribute(); @@ -134,28 +134,4 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } - - public static String temporaryName(Expression inner, Expression outer, int suffix) { - String in = toString(inner); - String out = toString(outer); - return rawTemporaryName(in, out, String.valueOf(suffix)); - } - - public static String rawTemporaryName(String inner, String outer, String suffix) { - return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; - } - - static int TO_STRING_LIMIT = 16; - - static String toString(Expression ex) { - return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); - } - - static String extractString(Expression ex) { - return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); - } - - static String limitToString(String string) { - return string.length() > 16 ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index 526cf7f17440d..85103109abed1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.parser.ParserUtils; @@ -194,21 +193,20 @@ public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) try { DissectParser parser = new DissectParser(pattern, appendSeparator); + Set referenceKeys = parser.referenceKeys(); - if (referenceKeys.size() > 0) { + if (referenceKeys.isEmpty() == false) { throw new ParsingException( src, "Reference keys not supported in dissect patterns: [%{*{}}]", referenceKeys.iterator().next() ); } - List keys = new ArrayList<>(); - for (var x : parser.outputKeys()) { - if (x.isEmpty() == false) { - keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); - } - } - return new Dissect(src, p, expression(ctx.primaryExpression()), new Dissect.Parser(pattern, appendSeparator, parser), keys); + + Dissect.Parser esqlDissectParser = new Dissect.Parser(pattern, appendSeparator, parser); + List keys = esqlDissectParser.keyAttributes(src); + + return new Dissect(src, p, expression(ctx.primaryExpression()), esqlDissectParser, keys); } catch (DissectException e) { throw new ParsingException(src, "Invalid pattern for dissect: [{}]", pattern); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java new file mode 100644 index 0000000000000..0253ac8dafd84 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; + +import java.util.List; + +/** + * A plan that creates new {@link Attribute}s and appends them to the child + * {@link org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan}'s attributes. + * Attributes are appended on the right hand side of the child's input. In case of name conflicts, the rightmost attribute with + * a given name shadows any attributes left of it + * (c.f. {@link org.elasticsearch.xpack.esql.expression.NamedExpressions#mergeOutputAttributes(List, List)}). + */ +public interface GeneratingPlan> { + List generatedAttributes(); + + /** + * Create a new instance of this node with new output {@link Attribute}s using the given names. + * If an output attribute already has the desired name, we continue using it; otherwise, we + * create a new attribute with a new {@link org.elasticsearch.xpack.esql.core.expression.NameId}. + */ + // TODO: the generated attributes should probably become synthetic once renamed + // blocked on https://github.com/elastic/elasticsearch/issues/98703 + PlanType withGeneratedNames(List newNames); + + default void checkNumberOfNewNames(List newNames) { + if (newNames.size() != generatedAttributes().size()) { + throw new IllegalArgumentException( + "Number of new names is [" + newNames.size() + "] but there are [" + generatedAttributes().size() + "] existing names." + ); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java index 1307d1870bba4..58167381ea9e8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java @@ -10,11 +10,14 @@ import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,6 +26,17 @@ public class Dissect extends RegexExtract { public record Parser(String pattern, String appendSeparator, DissectParser parser) { + public List keyAttributes(Source src) { + List keys = new ArrayList<>(); + for (var x : parser.outputKeys()) { + if (x.isEmpty() == false) { + keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); + } + } + + return keys; + } + // Override hashCode and equals since the parser is considered equal if its pattern and // appendSeparator are equal ( and DissectParser uses reference equality ) @Override @@ -54,6 +68,11 @@ protected NodeInfo info() { return NodeInfo.create(this, Dissect::new, child(), input, parser, extractedFields); } + @Override + public Dissect withGeneratedNames(List newNames) { + return new Dissect(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java index f418ab5da1c9d..5a3b5b5d1875f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java @@ -10,27 +10,34 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; +import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Enrich extends UnaryPlan { +public class Enrich extends UnaryPlan implements GeneratingPlan { private final Expression policyName; private final NamedExpression matchField; private final EnrichPolicy policy; private final Map concreteIndices; // cluster -> enrich indices + // This could be simplified by just always using an Alias. private final List enrichFields; private List output; @@ -128,6 +135,32 @@ public List output() { return output; } + @Override + public List generatedAttributes() { + return asAttributes(enrichFields); + } + + @Override + public Enrich withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + List newEnrichFields = new ArrayList<>(enrichFields.size()); + for (int i = 0; i < enrichFields.size(); i++) { + NamedExpression enrichField = enrichFields.get(i); + String newName = newNames.get(i); + if (enrichField.name().equals(newName)) { + newEnrichFields.add(enrichField); + } else if (enrichField instanceof ReferenceAttribute ra) { + newEnrichFields.add(new Alias(ra.source(), newName, ra.qualifier(), ra, new NameId(), ra.synthetic())); + } else if (enrichField instanceof Alias a) { + newEnrichFields.add(new Alias(a.source(), newName, a.qualifier(), a.child(), new NameId(), a.synthetic())); + } else { + throw new IllegalArgumentException("Enrich field must be Alias or ReferenceAttribute"); + } + } + return new Enrich(source(), child(), mode(), policyName(), matchField(), policy(), concreteIndices(), newEnrichFields); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java index bfe11c3d33d87..108122d4b163c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java @@ -10,17 +10,23 @@ import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.AttributeMap; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Eval extends UnaryPlan { +public class Eval extends UnaryPlan implements GeneratingPlan { private final List fields; private List lazyOutput; @@ -43,6 +49,50 @@ public List output() { return lazyOutput; } + @Override + public List generatedAttributes() { + return asAttributes(fields); + } + + @Override + public Eval withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + return new Eval(source(), child(), renameAliases(fields, newNames)); + } + + private List renameAliases(List originalAttributes, List newNames) { + AttributeMap.Builder aliasReplacedByBuilder = AttributeMap.builder(); + List newFields = new ArrayList<>(originalAttributes.size()); + for (int i = 0; i < originalAttributes.size(); i++) { + Alias field = originalAttributes.get(i); + String newName = newNames.get(i); + if (field.name().equals(newName)) { + newFields.add(field); + } else { + Alias newField = new Alias(field.source(), newName, field.qualifier(), field.child(), new NameId(), field.synthetic()); + newFields.add(newField); + aliasReplacedByBuilder.put(field.toAttribute(), newField.toAttribute()); + } + } + AttributeMap aliasReplacedBy = aliasReplacedByBuilder.build(); + + // We need to also update any references to the old attributes in the new attributes; e.g. + // EVAL x = 1, y = x + 1 + // renaming x, y to x1, y1 + // so far became + // EVAL x1 = 1, y1 = x + 1 + // - but x doesn't exist anymore, so replace it by x1 to obtain + // EVAL x1 = 1, y1 = x1 + 1 + + List newFieldsWithUpdatedRefs = new ArrayList<>(originalAttributes.size()); + for (Alias newField : newFields) { + newFieldsWithUpdatedRefs.add((Alias) newField.transformUp(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r))); + } + + return newFieldsWithUpdatedRefs; + } + @Override public boolean expressionsResolved() { return Resolvables.resolved(fields); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java index e495a2eb76668..3bd870e326157 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java @@ -105,6 +105,11 @@ public List output() { return NamedExpressions.mergeOutputAttributes(extractedFields, child().output()); } + @Override + public Grok withGeneratedNames(List newNames) { + return new Grok(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java index 5bf45fc0f61ad..7c1d457c18e55 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java @@ -9,16 +9,19 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public abstract class RegexExtract extends UnaryPlan { +public abstract class RegexExtract extends UnaryPlan implements GeneratingPlan { protected final Expression input; protected final List extractedFields; @@ -42,10 +45,36 @@ public Expression input() { return input; } + /** + * Upon parsing, these are named according to the {@link Dissect} or {@link Grok} pattern, but can be renamed without changing the + * pattern. + */ public List extractedFields() { return extractedFields; } + @Override + public List generatedAttributes() { + return extractedFields; + } + + List renameExtractedFields(List newNames) { + checkNumberOfNewNames(newNames); + + List renamedExtractedFields = new ArrayList<>(extractedFields.size()); + for (int i = 0; i < newNames.size(); i++) { + Attribute extractedField = extractedFields.get(i); + String newName = newNames.get(i); + if (extractedField.name().equals(newName)) { + renamedExtractedFields.add(extractedField); + } else { + renamedExtractedFields.add(extractedFields.get(i).withName(newNames.get(i)).withId(new NameId())); + } + } + + return renamedExtractedFields; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index ddf5fa6eaf8a3..28855abfff73c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -58,6 +58,8 @@ import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichLookupOperator; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; @@ -417,12 +419,14 @@ private PhysicalOperation planDissect(DissectExec dissect, LocalExecutionPlanner Layout.Builder layoutBuilder = source.layout.builder(); layoutBuilder.append(dissect.extractedFields()); final Expression expr = dissect.inputExpression(); - String[] attributeNames = Expressions.names(dissect.extractedFields()).toArray(new String[0]); + // Names in the pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String[] patternNames = Expressions.names(dissect.parser().keyAttributes(Source.EMPTY)).toArray(new String[0]); Layout layout = layoutBuilder.build(); source = source.with( new StringExtractOperator.StringExtractOperatorFactory( - attributeNames, + patternNames, EvalMapper.toEvaluator(expr, layout), () -> (input) -> dissect.parser().parser().parse(input) ), @@ -439,11 +443,15 @@ private PhysicalOperation planGrok(GrokExec grok, LocalExecutionPlannerContext c Map fieldToPos = new HashMap<>(extractedFields.size()); Map fieldToType = new HashMap<>(extractedFields.size()); ElementType[] types = new ElementType[extractedFields.size()]; + List extractedFieldsFromPattern = grok.pattern().extractedFields(); for (int i = 0; i < extractedFields.size(); i++) { - Attribute extractedField = extractedFields.get(i); - ElementType type = PlannerUtils.toElementType(extractedField.dataType()); - fieldToPos.put(extractedField.name(), i); - fieldToType.put(extractedField.name(), type); + DataType extractedFieldType = extractedFields.get(i).dataType(); + // Names in pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String patternName = extractedFieldsFromPattern.get(i).name(); + ElementType type = PlannerUtils.toElementType(extractedFieldType); + fieldToPos.put(patternName, i); + fieldToType.put(patternName, type); types[i] = type; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e7a999b892f44..669de17891583 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.QuantileStates; import org.elasticsearch.core.Tuple; +import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -21,6 +22,7 @@ import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; import org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.core.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; @@ -42,6 +44,7 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.index.IndexResolution; +import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules; import org.elasticsearch.xpack.esql.core.plan.logical.Filter; import org.elasticsearch.xpack.esql.core.plan.logical.Limit; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -71,6 +74,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateExtract; @@ -114,9 +118,13 @@ import org.elasticsearch.xpack.esql.optimizer.rules.LiteralsOnTheRight; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineFilters; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineLimits; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEnrich; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEval; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownRegexExtract; import org.elasticsearch.xpack.esql.optimizer.rules.SplitInWithFoldableValue; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.ParsingException; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Dissect; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -140,6 +148,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import static java.util.Arrays.asList; @@ -157,6 +166,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.getFieldAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.localSource; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyze; @@ -188,6 +198,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -1021,7 +1032,7 @@ public void testPushDownDissectPastProject() { var keep = as(plan, Project.class); var dissect = as(keep.child(), Dissect.class); - assertThat(dissect.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(dissect.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownGrokPastProject() { @@ -1034,7 +1045,7 @@ public void testPushDownGrokPastProject() { var keep = as(plan, Project.class); var grok = as(keep.child(), Grok.class); - assertThat(grok.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(grok.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownFilterPastProjectUsingEval() { @@ -4254,6 +4265,210 @@ public void testPushdownWithOverwrittenName() { } } + record PushdownShadowingGeneratingPlanTestCase( + BiFunction applyLogicalPlan, + OptimizerRules.OptimizerRule rule + ) {}; + + static PushdownShadowingGeneratingPlanTestCase[] PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES = { + // | EVAL y = to_integer(x), y = y + 1 + new PushdownShadowingGeneratingPlanTestCase((plan, attr) -> { + Alias y1 = new Alias(EMPTY, "y", new ToInteger(EMPTY, attr)); + Alias y2 = new Alias(EMPTY, "y", new Add(EMPTY, y1.toAttribute(), new Literal(EMPTY, 1, INTEGER))); + return new Eval(EMPTY, plan, List.of(y1, y2)); + }, new PushDownEval()), + // | DISSECT x "%{y} %{y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Dissect( + EMPTY, + plan, + attr, + new Dissect.Parser("%{y} %{y}", ",", new DissectParser("%{y} %{y}", ",")), + List.of(new ReferenceAttribute(EMPTY, "y", KEYWORD), new ReferenceAttribute(EMPTY, "y", KEYWORD)) + ), + new PushDownRegexExtract() + ), + // | GROK x "%{WORD:y} %{WORD:y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Grok(EMPTY, plan, attr, Grok.pattern(EMPTY, "%{WORD:y} %{WORD:y}")), + new PushDownRegexExtract() + ), + // | ENRICH some_policy ON x WITH y = some_enrich_idx_field, y = some_other_enrich_idx_field + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Enrich( + EMPTY, + plan, + Enrich.Mode.ANY, + new Literal(EMPTY, "some_policy", KEYWORD), + attr, + null, + Map.of(), + List.of( + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_enrich_idx_field", KEYWORD)), + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_other_enrich_idx_field", KEYWORD)) + ) + ), + new PushDownEnrich() + ) }; + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[y{r}#3, x{r}#2]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * We can freely push down the Eval without renaming, but need to update the Project's references. + * + * Project[[x{r}#2, y{r}#6 AS y]] + * \_Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), x.toAttribute())); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + assertEquals(newGeneratedExprs, initialGeneratedExprs); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGenerated = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + assertThat(projections, everyItem(instanceOf(ReferenceAttribute.class))); + ReferenceAttribute yShadowed = as(projections.get(1), ReferenceAttribute.class); + assertTrue(yShadowed.semanticEquals(rightmostGenerated)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[x{r}#2, y{r}#3, y{r}#3 AS z]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#3, so we rename. + * + * Project[[x{r}#2, y{r}#3 AS z, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(x{r}#2) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project( + EMPTY, + initialRow, + List.of(x.toAttribute(), y.toAttribute(), new Alias(EMPTY, "z", y.toAttribute())) + ); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "z", "y")); + assertThat(projections.get(0), instanceOf(ReferenceAttribute.class)); + Alias zAlias = as(projections.get(1), Alias.class); + ReferenceAttribute yRenamed = as(zAlias.child(), ReferenceAttribute.class); + assertEquals(yRenamed.name(), "y"); + Alias yAlias = as(projections.get(2), Alias.class); + ReferenceAttribute yTempRenamed = as(yAlias.child(), ReferenceAttribute.class); + assertTrue(yTempRenamed.semanticEquals(rightmostGeneratedWithNewName)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#3 + 1[INTEGER] AS y]] + * \_Project[[y{r}#1, y{r}#1 AS x]] + * \_Row[[2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#1, so we rename. + * Additionally, the rename "y AS x" needs to be propagated into the Eval. + * + * Project[[y{r}#1 AS x, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(y{r}#1) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProjectWithResolution() { + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + Alias yAliased = new Alias(EMPTY, "x", y.toAttribute()); + LogicalPlan initialRow = new Row(EMPTY, List.of(y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), yAliased)); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, yAliased.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + // This ensures that our generating plan doesn't use invalid references, resp. that any rename from the Project has + // been propagated into the generating plan. + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + Alias yRenamed = as(projections.get(0), Alias.class); + assertTrue(yRenamed.child().semanticEquals(y.toAttribute())); + Alias yTempRenamed = as(projections.get(1), Alias.class); + ReferenceAttribute yTemp = as(yTempRenamed.child(), ReferenceAttribute.class); + assertTrue(yTemp.semanticEquals(rightmostGeneratedWithNewName)); + } + } + /** * Expects * Project[[min{r}#4, languages{f}#11]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java index 545f3efe8ca79..63204b4dd797d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -55,10 +54,6 @@ static UnresolvedAttribute attribute(String name) { return new UnresolvedAttribute(EMPTY, name); } - static ReferenceAttribute referenceAttribute(String name, DataType type) { - return new ReferenceAttribute(EMPTY, name, type); - } - static Literal integer(int i) { return new Literal(EMPTY, i, DataType.INTEGER); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 111c90790caf0..a195fb8180bf3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -53,6 +53,7 @@ import java.util.function.Function; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.core.expression.Literal.FALSE; import static org.elasticsearch.xpack.esql.core.expression.Literal.TRUE; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;