Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5190,3 +5190,56 @@ ROW x = 0
x:integer | a:long | b:long | c:boolean | d:boolean
0 | 0 | 0 | false | true
;

inlineStatsDroppingGroupingField
required_capability: inline_stats_drop_groupings_fix

FROM employees
| KEEP languages, salary
| INLINE STATS avg_sal = ROUND(AVG(salary)) BY languages
| DROP languages
| SORT salary
| LIMIT 10
;

salary:i | avg_sal:d
25324 |41681.0
25945 |41681.0
25976 |50577.0
26436 |52419.0
27215 |47733.0
28035 |50577.0
28336 |52520.0
28941 |52419.0
29175 |48179.0
30404 |52419.0
;

inlineStatsDroppingMultipleCommandsGroupingFields
required_capability: inline_stats_drop_groupings_fix

FROM employees
| KEEP languages, salary, gender, emp_no, *rehired
| INLINE STATS avg_sal_by_gender = ROUND(AVG(salary)) BY gender
| INLINE STATS avg_sal_by_languages = ROUND(AVG(avg_sal_by_gender)) BY languages
| DROP languages, gender
| EVAL languages = emp_no
| INLINE STATS avg_sal_by_emp_no = ROUND(AVG(salary)) BY no = emp_no % 10, rehired = MV_MIN(is_rehired)
| RENAME no AS NO
| DROP NO, languages
| SORT salary
| LIMIT 10
;

salary:i | emp_no:i | is_rehired:boolean | avg_sal_by_gender:d | avg_sal_by_languages:d | avg_sal_by_emp_no:d | rehired:boolean
25324 |10015 |[false, false, false, true]|48761.0 |48779.0 |46801.0 |false
25945 |10035 |false |46861.0 |48779.0 |46801.0 |false
25976 |10092 |[false, false, true, true] |50491.0 |48082.0 |51831.0 |false
26436 |10048 |[true, true] |46861.0 |48142.0 |41230.0 |true
27215 |10057 |null |50491.0 |48177.0 |36905.0 |null
28035 |10084 |false |46861.0 |48082.0 |45508.0 |false
28336 |10026 |[false, true] |46861.0 |47950.0 |51094.0 |false
28941 |10068 |true |46861.0 |48142.0 |41230.0 |true
29175 |10060 |[false, false, false, true]|46861.0 |48116.0 |47571.0 |false
30404 |10042 |null |50491.0 |48142.0 |30404.0 |null
;
Original file line number Diff line number Diff line change
Expand Up @@ -1913,6 +1913,10 @@ public enum Cap {
*/
CONDITIONAL_BLOCK_LOADER_FOR_TEXT_FIELDS,

/**
* Fixes https://github.com/elastic/elasticsearch/issues/139359
*/
INLINE_STATS_DROP_GROUPINGS_FIX(INLINE_STATS.enabled),
// Last capability should still have a comma for fewer merge conflicts when adding new ones :)
// This comment prevents the semicolon from being on the previous capability when Spotless formats the file.
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.xpack.esql.core.expression.Alias;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.util.Holder;
Expand Down Expand Up @@ -102,7 +103,6 @@ private static LogicalPlan pruneColumns(LogicalPlan plan, AttributeSet.Builder u
private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, AttributeSet.Builder used, boolean inlineJoin) {
LogicalPlan p = aggregate;
var remaining = pruneUnusedAndAddReferences(aggregate.aggregates(), used);

if (remaining == null) {
return p;
}
Expand Down Expand Up @@ -141,8 +141,8 @@ private static LogicalPlan pruneColumnsInAggregate(Aggregate aggregate, Attribut

private static LogicalPlan pruneColumnsInInlineJoinRight(InlineJoin ij, AttributeSet.Builder used, Holder<Boolean> recheck) {
LogicalPlan p = ij;

var right = pruneColumns(ij.right(), used, true);

if (right.output().isEmpty() || isLocalEmptyRelation(right)) {
p = pruneRightSideAndProject(ij);
recheck.set(true);
Expand Down Expand Up @@ -199,12 +199,43 @@ private static LogicalPlan pruneColumnsInEval(Eval eval, AttributeSet.Builder us
}

// Note: only run when the Project is a descendent of an InlineJoin.
// Note: it's only applied on Projects on the right-hand side of the InlineJoin.
private static LogicalPlan pruneColumnsInProject(Project project, AttributeSet.Builder used, Holder<Boolean> recheck) {
LogicalPlan p = project;
LogicalPlan newChild = pruneColumns(project.child(), used, true);

if (newChild == project.child()) {
return p;
}
List<Expression> unPrunnableAttrs = List.of();
// Check to see if there is any aggregation left and, if so, compare the aggregation's groupings with the project's remaining
// used attributes. We don't want to prune all groupings if there is at least one aggregate left, since we need the grouping
// to compute the aggregate.
var aggs = newChild.collectFirstChildren(a -> a instanceof Aggregate);
if (aggs.isEmpty() == false) {// this can happen if the aggregate is completely pruned and replaced by its child
var agg = (Aggregate) aggs.get(0);

boolean isOneGroupingKept = false;
boolean isOneAggregateKept = false;
var output = newChild.output();
for (var attr : output) {
if (isOneGroupingKept == false && agg.groupings().contains(attr)) {
isOneGroupingKept = true;
}
if (isOneAggregateKept == false && agg.aggregates().contains(attr) && agg.groupings().contains(attr) == false) {
isOneAggregateKept = true;
}
}

var remaining = pruneUnusedAndAddReferences(project.projections(), used);
if (isOneGroupingKept == false && agg.groupings().isEmpty() == false && isOneAggregateKept) {
// don't let the groupings be completely pruned if there is at least one aggregate left
unPrunnableAttrs = agg.groupings();
}
}

var remaining = pruneUnusedAndAddReferences(project.projections(), used, unPrunnableAttrs);
if (remaining != null) {
p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), project.child(), remaining);
p = remaining.isEmpty() ? emptyLocalRelation(project) : new Project(project.source(), newChild, remaining);
recheck.set(true);
}

Expand Down Expand Up @@ -303,19 +334,27 @@ private static boolean isLocalEmptyRelation(LogicalPlan plan) {
* Returns null if no pruning occurred.
* As a side effect, the references of the kept attributes are added to the input set (builder) -- irrespective of the return value.
*/
private static <N extends NamedExpression> List<N> pruneUnusedAndAddReferences(List<N> named, AttributeSet.Builder used) {
private static <N extends NamedExpression> List<N> pruneUnusedAndAddReferences(
List<N> named,
AttributeSet.Builder used,
List<Expression> exceptions
) {
var clone = new ArrayList<>(named);

for (var it = clone.listIterator(clone.size()); it.hasPrevious();) {
N prev = it.previous();
var attr = prev.toAttribute();
if (used.contains(attr)) {
used.addAll(prev.references());
} else {
} else if (exceptions.contains(attr) == false) {
it.remove();
}
}

return clone.size() != named.size() ? clone : null;
}

private static <N extends NamedExpression> List<N> pruneUnusedAndAddReferences(List<N> named, AttributeSet.Builder used) {
return pruneUnusedAndAddReferences(named, used, List.of());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10894,4 +10894,168 @@ public void testDoubleInlineStatsPrunning_With_MV_Functions() {
assertThat(mvAvgAlias.child(), instanceOf(MvAvg.class));
as(leftEval.child(), EsRelation.class);
}

/*
* Project[[salary{f}#15, aaaaa{r}#7]]
* \_Limit[10[INTEGER],false,false]
* \_InlineJoin[LEFT,[languages{f}#13],[languages{r}#13]]
* |_Project[[languages{f}#13, salary{f}#15]]
* | \_EsRelation[employees][_meta_field{f}#16, emp_no{f}#10, first_name{f}#11, ..]
* \_Project[[aaaaa{r}#7, languages{f}#13]]
* \_Eval[[$$SUM$aaaaa$0{r$}#21 / $$COUNT$aaaaa$1{r$}#22 AS aaaaa#7]]
* \_Aggregate[[languages{f}#13],[SUM(salary{f}#15,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$aaaaa$0#21
* , COUNT(salary{f}#15,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$aaaaa$1#22, languages{f}#13]]
* \_StubRelation[[languages{f}#13, salary{f}#15]]
*/
public void testDropInlineStatsGrouping() {
var query = """
FROM employees
| KEEP languages, salary
| INLINE STATS aaaaa = AVG(salary) BY languages
| DROP languages
| LIMIT 10
""";

var plan = as(optimizedPlan(query), Project.class);
var limit = as(plan.child(), Limit.class);
var join = as(limit.child(), InlineJoin.class);
// Left
var leftProject = as(join.left(), Project.class);
assertMap(Expressions.names(leftProject.projections()), is(List.of("languages", "salary")));
as(leftProject.child(), EsRelation.class);
// Right
var rightProject = as(join.right(), Project.class);
assertMap(Expressions.names(rightProject.projections()), is(List.of("aaaaa", "languages")));
var rightEval = as(rightProject.child(), Eval.class);
var aggregate = as(rightEval.child(), Aggregate.class);
assertMap(Expressions.names(aggregate.aggregates()), is(List.of("$$SUM$aaaaa$0", "$$COUNT$aaaaa$1", "languages")));
as(aggregate.child(), StubRelation.class);
}

/*
* Project[[salary{f}#16, aaaaa{r}#7]]
* \_Limit[10[INTEGER],false,false]
* \_InlineJoin[LEFT,[languages{f}#14],[languages{r}#14]]
* |_Project[[languages{f}#14, salary{f}#16]]
* | \_EsRelation[employees][_meta_field{f}#17, emp_no{f}#11, first_name{f}#12, ..]
* \_Project[[aaaaa{r}#7, languages{f}#14]]
* \_Eval[[$$SUM$aaaaa$0{r$}#22 / $$COUNT$aaaaa$1{r$}#23 AS aaaaa#7]]
* \_Aggregate[[languages{f}#14],[SUM(salary{f}#16,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$aaaaa$0#22
* , COUNT(salary{f}#16,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$aaaaa$1#23, languages{f}#14]]
* \_StubRelation[[languages{f}#14, salary{f}#16]]
*/
public void testKeepExceptInlineStatsGrouping() {
var query = """
FROM employees
| KEEP languages, salary
| INLINE STATS aaaaa = AVG(salary) BY languages
| KEEP salary, aaaaa
| LIMIT 10
""";

var plan = optimizedPlan(query);
Project project = as(plan, Project.class);
var limit = as(project.child(), Limit.class);
var join = as(limit.child(), InlineJoin.class);
// Left
var leftProject = as(join.left(), Project.class);
assertMap(Expressions.names(leftProject.projections()), is(List.of("languages", "salary")));
as(leftProject.child(), EsRelation.class);
// Right
var rightProject = as(join.right(), Project.class);
assertMap(Expressions.names(rightProject.projections()), is(List.of("aaaaa", "languages")));
var rightEval = as(rightProject.child(), Eval.class);
var aggregate = as(rightEval.child(), Aggregate.class);
assertMap(Expressions.names(aggregate.aggregates()), is(List.of("$$SUM$aaaaa$0", "$$COUNT$aaaaa$1", "languages")));
as(aggregate.child(), StubRelation.class);
}

/*
* Project[[salary{f}#26, languages1{r}#7, languages2{r}#14, avg{r}#17]]
* \_TopN[[Order[salary{f}#26,ASC,LAST]],10[INTEGER],false]
* \_InlineJoin[LEFT,[languages{f}#24],[languages{r}#24]]
* |_Project[[salary{f}#26, languages1{r}#7, languages{f}#24]]
* | \_Eval[[MVAVG(languages{f}#24) AS languages1#7]]
* | \_EsRelation[employees][_meta_field{f}#27, emp_no{f}#21, first_name{f}#22, ..]
* \_Project[[languages2{r}#14, avg{r}#17, languages{f}#24]]
* \_Eval[[$$SUM$avg$0{r$}#34 / $$COUNT$avg$1{r$}#35 AS avg#17]]
* \_Aggregate[[languages{f}#24],[SUM(languages1{r}#7,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS languages2#14
* , SUM(salary{f}#26,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$avg$0#34,
* COUNT(salary{f}#26,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$avg$1#35, languages{f}#24]]
* \_StubRelation[[salary{f}#26, languages1{r}#7, avg{r}#10, languages{f}#24]]
*/
public void testDropDoubleInlineStatsGrouping() {
var query = """
FROM employees
| KEEP languages, salary
| INLINE STATS languages1 = MV_AVG(languages), avg = AVG(salary) BY languages
| INLINE STATS languages2 = SUM(languages1), avg = AVG(salary) BY languages
| DROP languages
| SORT salary
| LIMIT 10
""";

var plan = optimizedPlan(query);
Project project = as(plan, Project.class);
var topN = as(project.child(), TopN.class);
var join = as(topN.child(), InlineJoin.class);
// Left
var leftProject = as(join.left(), Project.class);
assertMap(Expressions.names(leftProject.projections()), is(List.of("salary", "languages1", "languages")));
var leftEval = as(leftProject.child(), Eval.class);
as(leftEval.child(), EsRelation.class);
// Right
var rightProject = as(join.right(), Project.class);
assertMap(Expressions.names(rightProject.projections()), is(List.of("languages2", "avg", "languages")));
var rightEval = as(rightProject.child(), Eval.class);
var aggregate = as(rightEval.child(), Aggregate.class);
assertMap(Expressions.names(aggregate.aggregates()), is(List.of("languages2", "$$SUM$avg$0", "$$COUNT$avg$1", "languages")));
as(aggregate.child(), StubRelation.class);
}

/*
* Project[[salary{f}#30, languages1{r}#9, long_noidx{f}#35, languages2{r}#17, avg{r}#20, gender{f}#27]]
* \_TopN[[Order[salary{f}#30,ASC,LAST]],10[INTEGER],false]
* \_InlineJoin[LEFT,[languages{f}#28, gender{f}#27],[languages{r}#28, gender{r}#27]]
* |_Project[[salary{f}#30, gender{f}#27, languages1{r}#9, languages{f}#28, long_noidx{f}#35]]
* | \_Eval[[MVAVG(long_noidx{f}#35) AS languages1#9]]
* | \_EsRelation[employees][_meta_field{f}#31, emp_no{f}#25, first_name{f}#26, ..]
* \_Project[[languages2{r}#17, avg{r}#20, languages{f}#28, gender{f}#27]]
* \_Eval[[$$SUM$avg$0{r$}#38 / $$COUNT$avg$1{r$}#39 AS avg#20]]
* \_Aggregate[[languages{f}#28, gender{f}#27],[SUM(languages1{r}#9,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS
* languages2#17, SUM(salary{f}#30,true[BOOLEAN],PT0S[TIME_DURATION],compensated[KEYWORD]) AS $$SUM$avg$0#38,
* COUNT(salary{f}#30,true[BOOLEAN],PT0S[TIME_DURATION]) AS $$COUNT$avg$1#39, languages{f}#28, gender{f}#27]]
* \_StubRelation[[salary{f}#30, gender{f}#27, languages1{r}#9, avg{r}#12, languages{f}#28, long_noidx{f}#35]]
*/
public void testDropDoubleInlineStats_PartialMultipleGroupings() {
var query = """
FROM employees
| KEEP languages, salary, long_noidx, gender
| INLINE STATS languages1 = MV_AVG(long_noidx), avg = AVG(salary) BY languages, long_noidx
| INLINE STATS languages2 = SUM(languages1), avg = AVG(salary) BY languages, gender
| DROP languages
| SORT salary
| LIMIT 10
""";

var plan = optimizedPlan(query);
Project project = as(plan, Project.class);
var topN = as(project.child(), TopN.class);
var join = as(topN.child(), InlineJoin.class);
// Left
var leftProject = as(join.left(), Project.class);
assertMap(Expressions.names(leftProject.projections()), is(List.of("salary", "gender", "languages1", "languages", "long_noidx")));
var leftEval = as(leftProject.child(), Eval.class);
var leftRelation = as(leftEval.child(), EsRelation.class);
// Right
var rightProject = as(join.right(), Project.class);
assertMap(Expressions.names(rightProject.projections()), is(List.of("languages2", "avg", "languages", "gender")));
var rightEval = as(rightProject.child(), Eval.class);
var aggregate = as(rightEval.child(), Aggregate.class);
assertMap(
Expressions.names(aggregate.aggregates()),
is(List.of("languages2", "$$SUM$avg$0", "$$COUNT$avg$1", "languages", "gender"))
);
as(aggregate.child(), StubRelation.class);
}
}