-
Notifications
You must be signed in to change notification settings - Fork 181
Add configurable sytem limitations for subsearch and join command
#4501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
1b520f3
90a3def
1f3fa5f
7bee225
4dbd406
c94cd07
4f073ed
8623934
cb4751e
782a6ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,20 +22,25 @@ | |
| import java.util.stream.IntStream; | ||
| import javax.annotation.Nullable; | ||
| import lombok.RequiredArgsConstructor; | ||
| import org.apache.calcite.plan.RelOptUtil; | ||
| import org.apache.calcite.rel.RelNode; | ||
| import org.apache.calcite.rel.RelShuttleImpl; | ||
| import org.apache.calcite.rel.logical.LogicalFilter; | ||
| import org.apache.calcite.rel.type.RelDataType; | ||
| import org.apache.calcite.rel.type.RelDataTypeFactory; | ||
| import org.apache.calcite.rex.RexBuilder; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.rex.RexLambdaRef; | ||
| import org.apache.calcite.rex.RexNode; | ||
| import org.apache.calcite.rex.RexUtil; | ||
| import org.apache.calcite.sql.SqlIntervalQualifier; | ||
| import org.apache.calcite.sql.fun.SqlStdOperatorTable; | ||
| import org.apache.calcite.sql.type.ArraySqlType; | ||
| import org.apache.calcite.sql.type.SqlTypeName; | ||
| import org.apache.calcite.util.DateString; | ||
| import org.apache.calcite.util.TimeString; | ||
| import org.apache.calcite.util.TimestampString; | ||
| import org.apache.commons.lang3.tuple.Pair; | ||
| import org.apache.logging.log4j.util.Strings; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
| import org.opensearch.sql.ast.expression.Alias; | ||
|
|
@@ -67,7 +72,11 @@ | |
| import org.opensearch.sql.ast.expression.subquery.ExistsSubquery; | ||
| import org.opensearch.sql.ast.expression.subquery.InSubquery; | ||
| import org.opensearch.sql.ast.expression.subquery.ScalarSubquery; | ||
| import org.opensearch.sql.ast.expression.subquery.SubqueryExpression; | ||
| import org.opensearch.sql.ast.tree.UnresolvedPlan; | ||
| import org.opensearch.sql.calcite.plan.LogicalSystemLimit; | ||
| import org.opensearch.sql.calcite.plan.LogicalSystemLimit.SystemLimitType; | ||
| import org.opensearch.sql.calcite.utils.CalciteUtils; | ||
| import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; | ||
| import org.opensearch.sql.calcite.utils.PlanUtils; | ||
| import org.opensearch.sql.common.utils.StringUtils; | ||
|
|
@@ -463,7 +472,7 @@ private RexNode extractRexNodeFromAlias(RexNode node) { | |
| public RexNode visitInSubquery(InSubquery node, CalcitePlanContext context) { | ||
| List<RexNode> nodes = node.getChild().stream().map(child -> analyze(child, context)).toList(); | ||
| UnresolvedPlan subquery = node.getQuery(); | ||
| RelNode subqueryRel = resolveSubqueryPlan(subquery, context); | ||
| RelNode subqueryRel = resolveSubqueryPlan(subquery, node, context); | ||
| if (subqueryRel.getRowType().getFieldCount() != nodes.size()) { | ||
| throw new SemanticCheckException( | ||
| "The number of columns in the left hand side of an IN subquery does not match the number" | ||
|
|
@@ -487,7 +496,7 @@ public RexNode visitScalarSubquery(ScalarSubquery node, CalcitePlanContext conte | |
| return context.relBuilder.scalarQuery( | ||
| b -> { | ||
| UnresolvedPlan subquery = node.getQuery(); | ||
| return resolveSubqueryPlan(subquery, context); | ||
| return resolveSubqueryPlan(subquery, node, context); | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -496,21 +505,104 @@ public RexNode visitExistsSubquery(ExistsSubquery node, CalcitePlanContext conte | |
| return context.relBuilder.exists( | ||
| b -> { | ||
| UnresolvedPlan subquery = node.getQuery(); | ||
| return resolveSubqueryPlan(subquery, context); | ||
| return resolveSubqueryPlan(subquery, node, context); | ||
| }); | ||
| } | ||
|
|
||
| private RelNode resolveSubqueryPlan(UnresolvedPlan subquery, CalcitePlanContext context) { | ||
| /** Insert a system_limit under correlate conditions. */ | ||
| private RelNode insertSysLimitUnderCorrelateConditions( | ||
| LogicalFilter logicalFilter, CalcitePlanContext context) { | ||
| // Before: | ||
| // LogicalFilter(condition=[AND(=($cor0.SAL, $2), >($1, 1000.0:DECIMAL(5, 1)))]) | ||
| // After: | ||
| // LogicalFilter(condition=[=($cor0.SAL, $2)]) | ||
| // LogicalSystemLimit(fetch=[1], type=[SUBSEARCH_MAXOUT]) | ||
| // LogicalFilter(condition=[>($1, 1000.0:DECIMAL(5, 1))]) | ||
| RexNode originalCondition = logicalFilter.getCondition(); | ||
| List<RexNode> conditions = RelOptUtil.conjunctions(originalCondition); | ||
| Pair<List<RexNode>, List<RexNode>> result = | ||
| CalciteUtils.partition(conditions, PlanUtils::containsCorrelVariable); | ||
| if (result.getLeft().isEmpty()) { | ||
| return logicalFilter; | ||
| } | ||
|
|
||
| RelNode input = logicalFilter.getInput(); | ||
| if (!result.getRight().isEmpty()) { | ||
| RexNode nonCorrelCondition = | ||
| RexUtil.composeConjunction(context.rexBuilder, result.getRight()); | ||
| input = LogicalFilter.create(input, nonCorrelCondition); | ||
| } | ||
| input = | ||
| LogicalSystemLimit.create( | ||
| SystemLimitType.SUBSEARCH_MAXOUT, | ||
| input, | ||
| context.relBuilder.literal(context.sysLimit.subsearchLimit())); | ||
| if (!result.getLeft().isEmpty()) { | ||
| RexNode correlCondition = RexUtil.composeConjunction(context.rexBuilder, result.getLeft()); | ||
| input = LogicalFilter.create(input, correlCondition); | ||
| } | ||
| return input; | ||
| } | ||
|
|
||
| private RelNode resolveSubqueryPlan( | ||
| UnresolvedPlan subquery, SubqueryExpression subqueryExpression, CalcitePlanContext context) { | ||
| boolean isNestedSubquery = context.isResolvingSubquery(); | ||
| context.setResolvingSubquery(true); | ||
| // clear and store the outer state | ||
| boolean isResolvingJoinConditionOuter = context.isResolvingJoinCondition(); | ||
| if (isResolvingJoinConditionOuter) { | ||
| context.setResolvingJoinCondition(false); | ||
| } | ||
| RelNode subqueryRel = subquery.accept(planVisitor, context); | ||
| subquery.accept(planVisitor, context); | ||
|
|
||
| if (context.sysLimit.subsearchLimit() > 0) { | ||
| // add subsearch.maxout limit to subsearch | ||
| if (subqueryExpression instanceof ExistsSubquery) { | ||
| // For exists-subquery, we cannot add system limit to the top of subquery simply. | ||
| // Instead, add system limit under the correlated conditions. | ||
| RelNode replacement = | ||
| context | ||
| .relBuilder | ||
| .peek() | ||
| .accept( | ||
| new RelShuttleImpl() { | ||
| @Override | ||
| public RelNode visit(LogicalFilter filter) { | ||
| RelNode newFilter = insertSysLimitUnderCorrelateConditions(filter, context); | ||
| if (newFilter != filter) { | ||
| return newFilter; | ||
| } | ||
| return visit((RelNode) filter); | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visit(RelNode other) { | ||
| RelNode newInput = | ||
| other.getInputs().isEmpty() ? null : other.getInput(0).accept(this); | ||
|
||
| if (newInput == null || newInput == other.getInput(0)) { | ||
| return other; | ||
| } | ||
| return other.copy(other.getTraitSet(), Collections.singletonList(newInput)); | ||
| } | ||
| }); | ||
| planVisitor.replaceTop(context.relBuilder, replacement); | ||
| } | ||
| if (subqueryExpression instanceof InSubquery) { | ||
|
||
| // For in-subquery, add system limit to the top of subquery. | ||
| planVisitor.replaceTop( | ||
| context.relBuilder, | ||
| LogicalSystemLimit.create( | ||
| SystemLimitType.SUBSEARCH_MAXOUT, | ||
| context.relBuilder.peek(), | ||
| context.relBuilder.literal(context.sysLimit.subsearchLimit()))); | ||
| } | ||
| } | ||
| // pop the inner plan | ||
| context.relBuilder.build(); | ||
| RelNode subqueryRel = context.relBuilder.build(); | ||
| // if maxout = 0, return empty results | ||
| if (context.sysLimit.subsearchLimit() == 0) { | ||
| subqueryRel = context.relBuilder.values(subqueryRel.getRowType()).build(); | ||
| } | ||
| // clear the exists subquery resolving state | ||
| // restore to the previous state | ||
| if (isResolvingJoinConditionOuter) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.calcite; | ||
|
|
||
| import org.opensearch.sql.common.setting.Settings; | ||
|
|
||
| public record SysLimit(Integer querySizeLimit, Integer subsearchLimit, Integer joinSubsearchLimit) { | ||
| /** Create SysLimit from Settings. */ | ||
| public static SysLimit fromSettings(Settings settings) { | ||
| return settings == null | ||
| ? UNLIMITED_SUBSEARCH | ||
| : new SysLimit( | ||
| settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT), | ||
| settings.getSettingValue(Settings.Key.PPL_SUBSEARCH_MAXOUT), | ||
| settings.getSettingValue(Settings.Key.PPL_JOIN_SUBSEARCH_MAXOUT)); | ||
| } | ||
|
|
||
| /** No limitation on subsearch */ | ||
| public static SysLimit UNLIMITED_SUBSEARCH = new SysLimit(10000, -1, -1); | ||
|
|
||
| /** For testing only */ | ||
| public static SysLimit DEFAULT = new SysLimit(10000, 10000, 50000); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit, is it possible to avoid access private method?
2cents, Add a frame in CalcitePlanContext, frame is boundary of subsearch, and define limit on frame. When visit subsearch, append LogicalSystemLimit to subsearch on each frame.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so.
When visit the subsearch side (right in join for example), the right plan was pushed to stack.
RelBuilder.pop()is private either. So we don't have a way to replace it.Here was my previous try code for join
The code use
relBuilder.with(), but the first parameteranalyze(node.getRight(), context)will push the subsearch to stack, and thewith()method push it twice.analyze(node.getLeft(), context), stack size is 1with(analyze(node.getRight(), context)), stack size is 2pushinwith, stack size is 3popinwith, stack size is 2context.relBuilder.push(withLimit), stack size is 3 (incorrect)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it work by using
relbuilder.build() + relbuilder.push(newTop)?relbuilder.build()will do pop while public.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sync offline. We still cannot use
relbuilder.build() + relbuilder.push(newTop)since it will empty thefields of Frame.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How does SQL Join translate to RelNode? It use the private method?