diff --git a/docs/changelog/141340.yaml b/docs/changelog/141340.yaml new file mode 100644 index 0000000000000..05bf51d00067a --- /dev/null +++ b/docs/changelog/141340.yaml @@ -0,0 +1,5 @@ +area: ES|QL +issues: [] +pr: 141340 +summary: Skip nullifying aliases for Aggregate groups. +type: bug diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 1f9a58c99b537..3536121d5b407 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -305,6 +305,7 @@ public static FieldAttribute fieldAttribute() { return fieldAttribute(randomAlphaOfLength(10), randomFrom(DataType.types())); } + // TODO: deduplicate some of the `FieldAttribute field(String name, DataType type)` methods in the ESQL tests (currently 6) public static FieldAttribute fieldAttribute(String name, DataType type) { return new FieldAttribute(EMPTY, name, new EsField(name, type, emptyMap(), randomBoolean(), EsField.TimeSeriesFieldType.NONE)); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec index b2644559ad26a..d492f4493b564 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/unmapped-nullify.csv-spec @@ -301,6 +301,45 @@ s:long | bar:null 0 | null ; +statsGroupAliasShadowingSourceColumnNoFilter +required_capability: optional_fields_nullify_skip_group_aliases + +SET unmapped_fields="nullify"\; +FROM languages +| STATS c = COUNT(*) BY language_code = does_not_exist +; + +c:long |language_code:null +4 |null +; + +statsGroupAliasShadowingSourceColumnWithFilter +required_capability: optional_fields_nullify_skip_group_aliases + +SET unmapped_fields="nullify"\; +FROM languages +| WHERE language_code == 1 +| STATS c = COUNT(*) BY language_code = does_not_exist, language_name +; + +c:long |language_code:null |language_name :keyword +1 |null |English +; + +statsGroupAliasShadowingSourceColumnWithFilterAndAggExpression +required_capability: optional_fields_nullify_skip_group_aliases + +SET unmapped_fields="nullify"\; +FROM languages +| WHERE language_code == 1 +| STATS c = COUNT(*) + COALESCE(language_code, 0) + BY language_code = does_not_exist1::INTEGER + does_not_exist2::INTEGER + language_code, language_name +; + +c:long |language_code:integer |language_name :keyword +1 |null |English +; + inlinestatsSum required_capability: optional_fields_nullify_tech_preview SET unmapped_fields="nullify"\; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 174b6988ce5a2..4fcc00da57ac9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -176,6 +176,11 @@ public enum Cap { */ OPTIONAL_FIELDS_NULLIFY_TECH_PREVIEW, + /** + * Don't nullify aliases for Aggregate groupings. + */ + OPTIONAL_FIELDS_NULLIFY_SKIP_GROUP_ALIASES, + /** * Support specifically for *just* the _index METADATA field. Used by CsvTests, since that is the only metadata field currently * supported. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index f4bc28c6910f5..71747b8dcfa42 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -659,17 +659,25 @@ private List maybeResolveAggregates( List aggregates = aggregate.aggregates(); ArrayList resolvedGroupings = new ArrayList<>(newGroupings.size()); + Set unresolvedGroupingNames = new HashSet<>(newGroupings.size()); for (Expression e : newGroupings) { Attribute attr = Expressions.attribute(e); - if (attr != null && attr.resolved()) { - resolvedGroupings.add(attr); + if (attr != null) { + if (attr.resolved()) { + resolvedGroupings.add(attr); + } else { + unresolvedGroupingNames.add(attr.name()); + } } } boolean allGroupingsResolved = groupings.size() == resolvedGroupings.size(); if (allGroupingsResolved == false || Resolvables.resolved(aggregates) == false) { Holder changed = new Holder<>(false); - List resolvedList = NamedExpressions.mergeOutputAttributes(resolvedGroupings, childrenOutput); + var inputAttributes = new ArrayList<>(childrenOutput); + // remove input attributes with the same name as unresolved groupings: could be shadowed by not yet resolved renamed groups + inputAttributes.removeIf(a -> unresolvedGroupingNames.contains(a.name())); + List resolvedList = NamedExpressions.mergeOutputAttributes(resolvedGroupings, inputAttributes); List newAggregates = new ArrayList<>(aggregates.size()); // If no groupings are resolved, skip the resolution of the references to groupings in the aggregates, resolve the diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java index d9621a9897070..5f4f6697e41ca 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/rules/ResolveUnmapped.java @@ -24,7 +24,7 @@ import org.elasticsearch.xpack.esql.core.expression.UnresolvedPattern; import org.elasticsearch.xpack.esql.core.expression.UnresolvedTimestamp; import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField; -import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Fork; @@ -33,9 +33,11 @@ import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -86,24 +88,25 @@ private static LogicalPlan resolve(LogicalPlan plan, boolean load) { var transformed = load ? load(plan, unresolvedLinkedSet) : nullify(plan, unresolvedLinkedSet); - return transformed.equals(plan) ? plan : refreshPlan(transformed, unresolved); + return transformed == plan ? plan : refreshPlan(transformed, unresolved); } /** * The method introduces {@code EVAL missing_field = NULL}-equivalent into the plan, on top of the source, for every attribute in - * {@code unresolved}. It also "patches" the introduced attributes through the plan, where needed (like through Fork/UntionAll). + * {@code unresolved}. */ - private static LogicalPlan nullify(LogicalPlan plan, Set unresolved) { - // insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it - var transformed = plan.transformUp( - n -> n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan, - p -> evalUnresolvedAtopUnary((UnaryPlan) p, nullAliases(unresolved)) - ); - // insert an Eval on top of those LeafPlan that are children of n-ary plans (could happen with UnionAll) - return transformed.transformUp( - n -> n instanceof UnaryPlan == false && n instanceof LeafPlan == false, - nAry -> evalUnresolvedAtopNary(nAry, nullAliases(unresolved)) - ); + private static LogicalPlan nullify(LogicalPlan plan, LinkedHashSet unresolved) { + return plan.transformUp(n -> { + // insert an Eval on top of every LeafPlan, if there's a UnaryPlan atop it + if (n instanceof UnaryPlan unary && unary.child() instanceof LeafPlan) { + return evalUnresolvedAtopUnary(unary, nullAliases(unresolved)); + } + // insert an Eval on top of those LeafPlan that are children of n-ary plans (LookupJoin, UnionAll) + if ((n instanceof UnaryPlan || n instanceof LeafPlan) == false) { + return evalUnresolvedBelowNary(n, unresolved); + } + return n; + }); } /** @@ -147,15 +150,13 @@ private static Fork patchFork(Fork fork) { List newChildren = new ArrayList<>(fork.children().size()); boolean childrenChanged = false; for (var child : fork.children()) { - Holder patched = new Holder<>(false); - var transformed = child.transformDown( - // TODO add a suitable forEachDownMayReturnEarly equivalent - n -> patched.get() == false && n instanceof Project, // process top Project only (Fork-injected) - n -> { - patched.set(true); - return patchForkProject((Project) n); + var transformed = child.transformDownSkipBranch((n, skip) -> { + if (n instanceof Project project) { + n = patchForkProject(project); + skip.set(true); // process top Project only (Fork-injected) } - ); + return n; + }); childrenChanged |= transformed != child; newChildren.add(transformed); } @@ -167,12 +168,14 @@ private static Fork patchFork(Fork fork) { * by the evalUnresolvedAtopXXX methods and need to be "let through" the Project. */ private static Project patchForkProject(Project project) { - var projectOutput = project.output(); - var childOutput = project.child().output(); + List projectOutput = project.output(); + List childOutput = project.child().output(); if (projectOutput.equals(childOutput) == false) { List delta = new ArrayList<>(childOutput); delta.removeAll(projectOutput); - project = project.withProjections(mergeOutputAttributes(delta, projectOutput)); + if (delta.isEmpty() == false) { + project = project.withProjections(mergeOutputAttributes(delta, projectOutput)); + } } return project; } @@ -201,12 +204,15 @@ private static LogicalPlan refreshUnresolved(LogicalPlan plan, List nullAliases) { + private static LogicalPlan evalUnresolvedBelowNary(LogicalPlan nAry, LinkedHashSet unresolved) { List newChildren = new ArrayList<>(nAry.children().size()); boolean changed = false; for (var child : nAry.children()) { - if (child instanceof LeafPlan source) { + if (child instanceof LeafPlan source + // skip right-sides of the Joins + && (nAry instanceof Join == false || child == ((Join) nAry).left())) { assertSourceType(source); + var nullAliases = removeShadowing(nullAliases(unresolved), source.output()); child = new Eval(source.source(), source, nullAliases); changed = true; } @@ -236,13 +242,26 @@ private static LogicalPlan evalUnresolvedAtopUnary(UnaryPlan unaryAtopSource, Li } return new Eval(eval.source(), eval.child(), combine(pre, eval.fields(), post)); } else { - return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), nullAliases)); + List filteredNullAliases = removeShadowing(nullAliases, unaryAtopSource.child().output()); + return unaryAtopSource.replaceChild(new Eval(unaryAtopSource.source(), unaryAtopSource.child(), filteredNullAliases)); } } + private static List removeShadowing(List aliases, List exclude) { + Set excludeNames = new HashSet<>(Expressions.names(exclude)); + aliases.removeIf(a -> excludeNames.contains(a.name())); + return aliases; + } + private static void assertSourceType(LogicalPlan source) { switch (source) { - case EsRelation unused -> { + case EsRelation esRelation -> { + if (esRelation.indexMode() != IndexMode.STANDARD) { + throw new EsqlIllegalArgumentException( + "invalid source type [{}] for unmapped field resolution", + esRelation.indexMode() + ); + } } case Row unused -> { } @@ -252,7 +271,7 @@ private static void assertSourceType(LogicalPlan source) { } } - private static List nullAliases(Set unresolved) { + private static List nullAliases(LinkedHashSet unresolved) { List aliases = new ArrayList<>(unresolved.size()); unresolved.forEach(u -> aliases.add(nullAlias(u))); return aliases; @@ -274,12 +293,31 @@ private static LinkedHashSet unresolvedLinkedSet(List collectUnresolved(LogicalPlan plan) { + var aliasedGroupings = aliasNamesInAggregateGroupings(plan); List unresolved = new ArrayList<>(); plan.forEachExpression(UnresolvedAttribute.class, ua -> { - if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false) { + if ((ua instanceof UnresolvedPattern || ua instanceof UnresolvedTimestamp) == false + // The aggs will "export" the aliases as UnresolvedAttributes part of their .aggregates(); we don't need to consider those + // as they'll be resolved as refs once the aliased expression is resolved. + && aliasedGroupings.contains(ua.name()) == false) { unresolved.add(ua); } }); return unresolved; } + + /** + * @return the names of the aliases used in the grouping expressions of any Aggregate found in the plan. + */ + private static Set aliasNamesInAggregateGroupings(LogicalPlan plan) { + Set aliasNames = new LinkedHashSet<>(); + plan.forEachUp(Aggregate.class, agg -> { + for (var grouping : agg.groupings()) { + if (grouping instanceof Alias alias) { + aliasNames.add(alias.name()); + } + } + }); + return aliasNames; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index e3383412e9027..a4cb594503530 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -14,6 +14,8 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -23,6 +25,7 @@ import org.elasticsearch.xpack.esql.index.IndexResolution; import org.elasticsearch.xpack.esql.inference.InferenceResolution; import org.elasticsearch.xpack.esql.inference.ResolvedInference; +import org.elasticsearch.xpack.esql.optimizer.rules.PlanConsistencyChecker; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.QueryParams; import org.elasticsearch.xpack.esql.plan.EsqlStatement; @@ -172,12 +175,24 @@ public static LogicalPlan analyze(String query) { } public static LogicalPlan analyzeStatement(String query) { + return analyzeStatement(query, true); + } + + public static LogicalPlan analyzeStatement(String query, boolean checkPlan) { var statement = EsqlParser.INSTANCE.createStatement(query); var relations = statement.plan().collectFirstChildren(UnresolvedRelation.class::isInstance); var indexName = relations.isEmpty() ? null : ((UnresolvedRelation) relations.getFirst()).indexPattern().indexPattern(); var indexResolutions = indexResolutions(indexName); var analyzer = analyzer(indexResolutions, TEST_VERIFIER, configuration(query), statement); - return analyzer.analyze(statement.plan()); + var analyzed = analyzer.analyze(statement.plan()); + if (checkPlan) { + var failures = new Failures(); + PlanConsistencyChecker.checkPlan(analyzed, failures); + if (failures.hasFailures()) { + throw new VerificationException(failures); + } + } + return analyzed; } public static LogicalPlan analyze(String query, String mapping) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java index d2b8bdf2af5bb..0a0da9a1bde36 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerUnmappedTests.java @@ -356,6 +356,53 @@ public void testEvalAfterMatchingKeepWithWildcard() { assertThat(relation.indexPattern(), is("test")); } + /* + * Limit[1000[INTEGER],false,false] + * \_Eval[[emp_does_not_exist_field{r}#23 + 2[INTEGER] AS y#9]] + * \_Eval[[emp_no{f}#11 + 1[INTEGER] AS x#6]] + * \_Project[[emp_no{f}#11, emp_does_not_exist_field{r}#23]] + * \_Eval[[null[NULL] AS emp_does_not_exist_field#23]] + * \_EsRelation[test][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..] + */ + public void testEvalAfterMatchingKeepWithFieldWildcard() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM test + | KEEP emp_* + | EVAL x = emp_no + 1 + | EVAL y = emp_does_not_exist_field + 2 + """)); + + // Top implicit limit 1000 + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + // Eval for y = emp_does_not_exist_field + 2 + var evalY = as(limit.child(), Eval.class); + assertThat(evalY.fields(), hasSize(1)); + assertThat(evalY.fields().get(0).name(), is("y")); + + // The child is Eval for x = emp_no + 1 + var evalX = as(evalY.child(), Eval.class); + assertThat(evalX.fields(), hasSize(1)); + assertThat(evalX.fields().get(0).name(), is("x")); + + // The child is Project with emp_no and emp_does_not_exist_field + var esqlProject = as(evalX.child(), Project.class); + assertThat(Expressions.names(esqlProject.output()), is(List.of("emp_no", "emp_does_not_exist_field"))); + + // The child is Eval introducing emp_does_not_exist_field as null + var evalNull = as(esqlProject.child(), Eval.class); + assertThat(evalNull.fields(), hasSize(1)); + var alias = as(evalNull.fields().get(0), Alias.class); + assertThat(alias.name(), is("emp_does_not_exist_field")); + var lit = as(alias.child(), Literal.class); + assertThat(lit.dataType(), is(DataType.NULL)); + + // The child is EsRelation + var relation = as(evalNull.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("test")); + } + /* * Limit[1000[INTEGER],false,false] * \_Project[[_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, gender{f}#8, hire_date{f}#13, job{f}#14, job.raw{f}#15, languages{f}#9, @@ -479,6 +526,19 @@ public void testFailEvalAfterDrop() { verificationFailure(setUnmappedLoad(query), failure); } + public void testFailFilterAfterDrop() { + var query = """ + FROM test + | WHERE emp_no > 1000 + | DROP emp_no + | WHERE emp_no < 2000 + """; + + var failure = "line 4:9: Unknown column [emp_no]"; + verificationFailure(setUnmappedNullify(query), failure); + verificationFailure(setUnmappedLoad(query), failure); + } + /* * Limit[1000[INTEGER],false,false] * \_Project[[_meta_field{f}#16, emp_no{f}#10 AS employee_number#8, first_name{f}#11, gender{f}#12, hire_date{f}#17, job{f}#18, @@ -1014,9 +1074,9 @@ public void testStatsAggAndGroup() { /* * Limit[1000[INTEGER],false,false] - * \_Aggregate[[does_not_exist2{r}#24 AS d2#5, emp_no{f}#13],[SUM(does_not_exist1{r}#25,true[BOOLEAN],PT0S[TIME_DURATION], - * compensated[KEYWORD]) + d2{r}#5 AS s#10, d2{r}#5, emp_no{f}#13]] - * \_Eval[[null[NULL] AS does_not_exist2#24, null[NULL] AS does_not_exist1#25, null[NULL] AS d2#26]] + * \_Aggregate[[does_not_exist2{r}#24 AS d2#5, emp_no{f}#13], + * [SUM(does_not_exist1{r}#25,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) + d2{r}#5 AS s#10, d2{r}#5, emp_no{f}#13]] + * \_Eval[[null[NULL] AS does_not_exist2#24, null[NULL] AS does_not_exist1#25]] * \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..] */ public void testStatsAggAndAliasedGroup() { @@ -1043,16 +1103,13 @@ public void testStatsAggAndAliasedGroup() { assertThat(Expressions.name(alias.child()), is("SUM(does_not_exist1) + d2")); var eval = as(agg.child(), Eval.class); - assertThat(eval.fields(), hasSize(3)); + assertThat(eval.fields(), hasSize(2)); var alias2 = as(eval.fields().get(0), Alias.class); assertThat(alias2.name(), is("does_not_exist2")); assertThat(as(alias2.child(), Literal.class).dataType(), is(DataType.NULL)); var alias1 = as(eval.fields().get(1), Alias.class); assertThat(alias1.name(), is("does_not_exist1")); assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); - var alias0 = as(eval.fields().get(2), Alias.class); - assertThat(alias0.name(), is("d2")); - assertThat(as(alias0.child(), Literal.class).dataType(), is(DataType.NULL)); var relation = as(eval.child(), EsRelation.class); assertThat(relation.indexPattern(), is("test")); @@ -1060,10 +1117,103 @@ public void testStatsAggAndAliasedGroup() { /* * Limit[1000[INTEGER],false,false] - * \_Aggregate[[does_not_exist2{r}#29 + does_not_exist3{r}#30 AS s0#6, emp_no{f}#18 AS s1#9],[SUM(does_not_exist1{r}#31,true[B - * OOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) + s0{r}#6 + s1{r}#9 AS sum#14, s0{r}#6, s1{r}#9]] - * \_Eval[[null[NULL] AS does_not_exist2#29, null[NULL] AS does_not_exist3#30, null[NULL] AS does_not_exist1#31, - * null[NULL] AS s0#32]] + * \_Aggregate[[does_not_exist{r}#14 AS language_code#6, language_name{f}#13], + * [COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS c#9, language_code{r}#6, language_name{f}#13]] + * \_Filter[language_code{f}#12 == 1[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist#14]] + * \_EsRelation[languages][language_code{f}#12, language_name{f}#13] + */ + public void testStatsAggAndAliasedShadowingGroup() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM languages + | WHERE language_code == 1 + | STATS c = COUNT(*) BY language_code = does_not_exist, language_name + """)); + + assertThat(Expressions.names(plan.output()), is(List.of("c", "language_code", "language_name"))); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + var group_language_code = as(agg.groupings().getFirst(), Alias.class); + assertThat(group_language_code.name(), is("language_code")); + assertThat(Expressions.name(group_language_code.child()), is("does_not_exist")); + assertThat(Expressions.name(agg.groupings().get(1)), is("language_name")); + + assertThat(agg.aggregates(), hasSize(3)); // includes grouping keys + var alias = as(agg.aggregates().get(0), Alias.class); + assertThat(alias.name(), is("c")); + assertThat(Expressions.name(alias.child()), is("COUNT(*)")); + var agg_language_code = as(agg.aggregates().get(1), ReferenceAttribute.class); + assertThat(agg_language_code.id(), is(group_language_code.id())); + + var filter = as(agg.child(), Filter.class); + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var alias0 = as(eval.fields().getFirst(), Alias.class); + assertThat(alias0.name(), is("does_not_exist")); + assertThat(as(alias0.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("languages")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[TOINTEGER(does_not_exist1{r}#18) + TOINTEGER(does_not_exist2{r}#19) + language_code{f}#15 AS language_code#8, + * language_name{f}#16],[COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) + language_code{r}#8 AS c#12, language_code{r}#8, + * language_name{f}#16]] + * \_Filter[language_code{f}#15 == 1[INTEGER]] + * \_Eval[[null[NULL] AS does_not_exist1#18, null[NULL] AS does_not_exist2#19]] + * \_EsRelation[languages][language_code{f}#15, language_name{f}#16] + */ + public void testStatsAggAndAliasedShadowingGroupOverExpression() { + var plan = analyzeStatement(setUnmappedNullify(""" + FROM languages + | WHERE language_code == 1 + | STATS c = COUNT(*) + language_code + BY language_code = does_not_exist1::INTEGER + does_not_exist2::INTEGER + language_code, language_name + """)); + + assertThat(Expressions.names(plan.output()), is(List.of("c", "language_code", "language_name"))); + + var limit = as(plan, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), is(1000)); + + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.groupings(), hasSize(2)); + var groupAlias = as(agg.groupings().getFirst(), Alias.class); + assertThat(groupAlias.name(), is("language_code")); + assertThat(Expressions.name(groupAlias.child()), is("does_not_exist1::INTEGER + does_not_exist2::INTEGER + language_code")); + assertThat(Expressions.name(agg.groupings().get(1)), is("language_name")); + + assertThat(agg.aggregates(), hasSize(3)); // includes grouping keys + var alias = as(agg.aggregates().getFirst(), Alias.class); + assertThat(alias.name(), is("c")); + assertThat(Expressions.name(alias.child()), is("COUNT(*) + language_code")); + + var filter = as(agg.child(), Filter.class); + var eval = as(filter.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + var alias0 = as(eval.fields().get(0), Alias.class); + assertThat(alias0.name(), is("does_not_exist1")); + assertThat(as(alias0.child(), Literal.class).dataType(), is(DataType.NULL)); + var alias1 = as(eval.fields().get(1), Alias.class); + assertThat(alias1.name(), is("does_not_exist2")); + assertThat(as(alias1.child(), Literal.class).dataType(), is(DataType.NULL)); + + var relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexPattern(), is("languages")); + } + + /* + * Limit[1000[INTEGER],false,false] + * \_Aggregate[[does_not_exist2{r}#30 + does_not_exist3{r}#31 AS s0#6, emp_no{f}#18 AS s1#9], + * [SUM(does_not_exist1{r}#32,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) + s0{r}#6 + s1{r}#9 AS sum#14, s0{r}#6, + * s1{r}#9]] + * \_Eval[[null[NULL] AS does_not_exist2#30, null[NULL] AS does_not_exist3#31, null[NULL] AS does_not_exist1#32]] * \_EsRelation[test][_meta_field{f}#24, emp_no{f}#18, first_name{f}#19, ..] */ public void testStatsAggAndAliasedGroupWithExpression() { @@ -1087,8 +1237,8 @@ public void testStatsAggAndAliasedGroupWithExpression() { assertThat(Expressions.name(alias.child()), is("SUM(does_not_exist1) + s0 + s1")); var eval = as(agg.child(), Eval.class); - assertThat(eval.fields(), hasSize(4)); - assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist2", "does_not_exist3", "does_not_exist1", "s0"))); + assertThat(eval.fields(), hasSize(3)); + assertThat(Expressions.names(eval.fields()), is(List.of("does_not_exist2", "does_not_exist3", "does_not_exist1"))); eval.fields().forEach(a -> assertThat(as(as(a, Alias.class).child(), Literal.class).dataType(), is(DataType.NULL))); var relation = as(eval.child(), EsRelation.class); @@ -1905,6 +2055,8 @@ public void testSubqueryAndMainQuery() { // Left branch: EsRelation[test] with Project + Eval nulls var leftProject = as(union.children().get(0), Project.class); + var leftProject_does_not_exist2 = as(leftProject.output().get(14), ReferenceAttribute.class); + assertThat(leftProject_does_not_exist2.name(), is("does_not_exist2")); var leftEval = as(leftProject.child(), Eval.class); assertThat(Expressions.names(leftEval.fields()), is(List.of("$$does_not_exist2$converted_to$long"))); var leftEvalEval = as(leftEval.child(), Eval.class); @@ -1917,12 +2069,19 @@ public void testSubqueryAndMainQuery() { var leftDne1 = as(leftEvalEval.fields().get(2), Alias.class); assertThat(leftDne1.name(), is("does_not_exist1")); assertThat(as(leftDne1.child(), Literal.class).dataType(), is(DataType.NULL)); + var leftDne2 = as(leftEvalEval.fields().get(3), Alias.class); + assertThat(leftDne2.name(), is("does_not_exist2")); + assertThat(as(leftDne2.child(), Literal.class).dataType(), is(DataType.NULL)); + assertThat(leftDne2.id(), is(leftProject_does_not_exist2.id())); // same IDs within the branch var leftRel = as(leftEvalEval.child(), EsRelation.class); assertThat(leftRel.indexPattern(), is("test")); // Right branch: Project + Eval many nulls, Subquery -> Filter -> Eval -> EsRelation[languages] var rightProject = as(union.children().get(1), Project.class); + var rightProject_does_not_exist2 = as(rightProject.output().get(14), ReferenceAttribute.class); + assertThat(rightProject_does_not_exist2.name(), is("does_not_exist2")); + assertThat(rightProject_does_not_exist2.id(), not(leftProject_does_not_exist2.id())); // different IDs between branches var rightEval = as(rightProject.child(), Eval.class); assertThat(Expressions.names(rightEval.fields()), is(List.of("$$does_not_exist2$converted_to$long"))); var rightEvalEval = as(rightEval.child(), Eval.class); @@ -1953,6 +2112,8 @@ public void testSubqueryAndMainQuery() { var rightSubEval = as(rightSubFilter.child(), Eval.class); assertThat(Expressions.names(rightSubEval.fields()), is(List.of("does_not_exist1", "does_not_exist2"))); + var rightSubEval_does_not_exist2 = as(rightSubEval.fields().get(1), Alias.class); + assertThat(rightSubEval_does_not_exist2.id(), is(rightProject_does_not_exist2.id())); // same IDs within the branch var rightRel = as(rightSubEval.child(), EsRelation.class); assertThat(rightRel.indexPattern(), is("languages")); @@ -2152,7 +2313,7 @@ public void testSubqueryAfterUnionAllOfStatsAndMain() { var plan = analyzeStatement(setUnmappedNullify(""" FROM employees, - (FROM employees | STATS c = count(*)) + (FROM employees | STATS c = COUNT(*)) | SORT does_not_exist """)); @@ -2508,16 +2669,16 @@ public void testSubquerysWithMainAndSameOptional() { /* * Limit[1000[INTEGER],false,false] - * \_MvExpand[languageCode{r}#24,languageCode{r}#197] - * \_Project[[count(*){r}#18, emp_no{r}#131 AS empNo#21, language_code{r}#141 AS languageCode#24, does_not_exist2{r}#196]] - * \_Aggregate[[emp_no{r}#131, language_code{r}#141, does_not_exist2{r}#196], - * [COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS count(*)#18, emp_no{r}#131, language_code{r}#141, - * does_not_exist2{r}#196]] + * \_MvExpand[languageCode{r}#24,languageCode{r}#196] + * \_Project[[COUNT(*){r}#18, emp_no{r}#131 AS empNo#21, language_code{r}#141 AS languageCode#24, does_not_exist2{r}#195]] + * \_Aggregate[[emp_no{r}#131, language_code{r}#141, does_not_exist2{r}#195], + * [COUNT(*[KEYWORD],true[BOOLEAN],PT0S[TIME_DURATION]) AS COUNT(*)#18, emp_no{r}#131, language_code{r}#141, + * does_not_exist2{r}#195]] * \_Filter[emp_no{r}#92 > 10000[INTEGER] OR $$does_not_exist1$converted_to$long{r$}#150 < 10[INTEGER]] - * \_UnionAll[[_meta_field{r}#179, emp_no{r}#180, first_name{r}#181, gender{r}#182, hire_date{r}#183, job{r}#184, - * job.raw{r}#185, languages{r}#186, last_name{r}#187, long_noidx{r}#188, salary{r}#189, language_code{r}#190, - * languageName{r}#191, max(@timestamp){r}#192, language_name{r}#193, does_not_exist1{r}#194, - * $$does_not_exist1$converted_to$long{r$}#195, does_not_exist2{r}#196]] + * \_UnionAll[[_meta_field{r}#178, emp_no{r}#179, first_name{r}#180, gender{r}#181, hire_date{r}#182, job{r}#183, + * job.raw{r}#184, languages{r}#185, last_name{r}#186, long_noidx{r}#187, salary{r}#188, language_code{r}#189, + * languageName{r}#190, max(@timestamp){r}#191, language_name{r}#192, does_not_exist1{r}#193, + * $$does_not_exist1$converted_to$long{r$}#194, does_not_exist2{r}#195]] * |_Project[[_meta_field{f}#34, emp_no{f}#28, first_name{f}#29, gender{f}#30, hire_date{f}#35, job{f}#36, job.raw{f}#37, * languages{f}#31, last_name{f}#32, long_noidx{f}#38, salary{f}#33, language_code{r}#58, languageName{r}#59, * max(@timestamp){r}#60, language_name{r}#61, does_not_exist1{r}#106, $$does_not_exist1$converted_to$long{r$}#146, @@ -2544,8 +2705,8 @@ public void testSubquerysWithMainAndSameOptional() { * |_Project[[_meta_field{r}#75, emp_no{r}#76, first_name{r}#77, gender{r}#78, hire_date{r}#79, job{r}#80, job.raw{r}#81, * languages{r}#82, last_name{r}#83, long_noidx{r}#84, salary{r}#85, language_code{r}#86, languageName{r}#87, * max(@timestamp){r}#8, language_name{r}#88, does_not_exist1{r}#129, $$does_not_exist1$converted_to$long{r$}#148, - * does_not_exist2{r}#178]] - * | \_Eval[[null[NULL] AS does_not_exist2#178]] + * does_not_exist2{r}#177]] + * | \_Eval[[null[NULL] AS does_not_exist2#177]] * | \_Project[[_meta_field{r}#75, emp_no{r}#76, first_name{r}#77, gender{r}#78, hire_date{r}#79, job{r}#80, job.raw{r}#81, * languages{r}#82, last_name{r}#83, long_noidx{r}#84, salary{r}#85, language_code{r}#86, languageName{r}#87, * max(@timestamp){r}#8, language_name{r}#88, does_not_exist1{r}#129, $$does_not_exist1$converted_to$long{r$}#148]] @@ -2565,17 +2726,16 @@ public void testSubquerysWithMainAndSameOptional() { * | \_EsRelation[sample_data][@timestamp{f}#41, client_ip{f}#42, event_duration{f..] * \_Project[[_meta_field{f}#51, emp_no{f}#45, first_name{f}#46, gender{f}#47, hire_date{f}#52, job{f}#53, job.raw{f}#54, * languages{f}#48, last_name{f}#49, long_noidx{f}#55, salary{f}#50, language_code{r}#12, languageName{r}#89, - * max(@timestamp){r}#90, language_name{f}#57, does_not_exist1{r}#110, $$does_not_exist1$converted_to$long{r$}#149, - * does_not_exist2{r}#155]] - * \_Eval[[TOLONG(does_not_exist1{r}#110) AS $$does_not_exist1$converted_to$long#149]] + * max(@timestamp){r}#90, language_name{f}#57, does_not_exist1{r}#109, $$does_not_exist1$converted_to$long{r$}#149, + * does_not_exist2{r}#154]] + * \_Eval[[TOLONG(does_not_exist1{r}#109) AS $$does_not_exist1$converted_to$long#149]] * \_Eval[[null[KEYWORD] AS languageName#89, null[DATETIME] AS max(@timestamp)#90]] * \_Subquery[] * \_LookupJoin[LEFT,[language_code{r}#12],[language_code{f}#56],false,null] * |_Eval[[languages{f}#48 AS language_code#12, null[NULL] AS does_not_exist1#109, * null[NULL] AS does_not_exist2#154]] * | \_EsRelation[test][_meta_field{f}#51, emp_no{f}#45, first_name{f}#46, ..] - * \_Eval[[null[NULL] AS does_not_exist1#110, null[NULL] AS does_not_exist2#155]] - * \_EsRelation[languages_lookup][LOOKUP][language_code{f}#56, language_name{f}#57] + * \_EsRelation[languages_lookup][LOOKUP][language_code{f}#56, language_name{f}#57] */ public void testSubquerysMixAndLookupJoinNullify() { assumeTrue("Requires subquery in FROM command support", EsqlCapabilities.Cap.SUBQUERY_IN_FROM_COMMAND.isEnabled()); @@ -2591,13 +2751,13 @@ public void testSubquerysMixAndLookupJoinNullify() { | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code) | WHERE emp_no > 10000 OR does_not_exist1::LONG < 10 - | STATS count(*) BY emp_no, language_code, does_not_exist2 + | STATS COUNT(*) BY emp_no, language_code, does_not_exist2 | RENAME emp_no AS empNo, language_code AS languageCode | MV_EXPAND languageCode """)); // TODO: golden testing - assertThat(Expressions.names(plan.output()), is(List.of("count(*)", "empNo", "languageCode", "does_not_exist2"))); + assertThat(Expressions.names(plan.output()), is(List.of("COUNT(*)", "empNo", "languageCode", "does_not_exist2"))); } // same tree as above, except for the source nodes @@ -2615,13 +2775,13 @@ public void testSubquerysMixAndLookupJoinLoad() { | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code) | WHERE emp_no > 10000 OR does_not_exist1::LONG < 10 - | STATS count(*) BY emp_no, language_code, does_not_exist2 + | STATS COUNT(*) BY emp_no, language_code, does_not_exist2 | RENAME emp_no AS empNo, language_code AS languageCode | MV_EXPAND languageCode """)); // TODO: golden testing - assertThat(Expressions.names(plan.output()), is(List.of("count(*)", "empNo", "languageCode", "does_not_exist2"))); + assertThat(Expressions.names(plan.output()), is(List.of("COUNT(*)", "empNo", "languageCode", "does_not_exist2"))); List esRelations = plan.collect(EsRelation.class); assertThat( @@ -3307,7 +3467,7 @@ public void testChangedTimestmapFieldWithRate() { verificationFailure(setUnmappedNullify(""" TS k8s | RENAME @timestamp AS newTs - | STATS max(rate(network.total_cost)) BY tbucket = bucket(newTs, 1hour) + | STATS max(rate(network.total_cost)) BY tbucket = BUCKET(newTs, 1hour) """), "3:13: [rate(network.total_cost)] " + UnresolvedTimestamp.UNRESOLVED_SUFFIX); verificationFailure(setUnmappedNullify("""