diff --git a/pkg/bindinfo/binding_plan_generation.go b/pkg/bindinfo/binding_plan_generation.go index 2245b24dbe596..22432758fe72b 100644 --- a/pkg/bindinfo/binding_plan_generation.go +++ b/pkg/bindinfo/binding_plan_generation.go @@ -346,6 +346,8 @@ func genPlanUnderState(sctx sessionctx.Context, stmt ast.StmtNode, state *state) sctx.GetSessionVars().SetAllowPreferRangeScan(state.varValues[i].(bool)) case vardef.TiDBOptEnableNoDecorrelateInSelect: sctx.GetSessionVars().EnableNoDecorrelateInSelect = state.varValues[i].(bool) + case vardef.TiDBOptEnableSemiJoinRewrite: + sctx.GetSessionVars().EnableSemiJoinRewrite = state.varValues[i].(bool) case vardef.TiDBOptSelectivityFactor: sctx.GetSessionVars().SelectivityFactor = state.varValues[i].(float64) default: @@ -424,7 +426,7 @@ func adjustVar(varName string, varVal any) (newVarVal any, err error) { } // increase 0.1 each step return v + 0.1, nil - case vardef.TiDBOptPreferRangeScan, vardef.TiDBOptEnableNoDecorrelateInSelect, vardef.TiDBOptAlwaysKeepJoinKey: // flip the switch + case vardef.TiDBOptPreferRangeScan, vardef.TiDBOptEnableNoDecorrelateInSelect, vardef.TiDBOptAlwaysKeepJoinKey, vardef.TiDBOptEnableSemiJoinRewrite: // flip the switch return !varVal.(bool), nil } return nil, fmt.Errorf("unsupported variable %s in plan generation", varName) @@ -505,6 +507,8 @@ func getStartState(vars []string, fixes []uint64) (*state, error) { s.varValues = append(s.varValues, vardef.DefOptPreferRangeScan) case vardef.TiDBOptEnableNoDecorrelateInSelect: s.varValues = append(s.varValues, vardef.DefOptEnableNoDecorrelateInSelect) + case vardef.TiDBOptEnableSemiJoinRewrite: + s.varValues = append(s.varValues, vardef.DefOptEnableSemiJoinRewrite) case vardef.TiDBOptAlwaysKeepJoinKey: s.varValues = append(s.varValues, vardef.DefOptAlwaysKeepJoinKey) case vardef.TiDBOptSelectivityFactor: diff --git a/pkg/bindinfo/binding_plan_generation_test.go b/pkg/bindinfo/binding_plan_generation_test.go index 8f939d4777b8f..2f555be9f7049 100644 --- a/pkg/bindinfo/binding_plan_generation_test.go +++ b/pkg/bindinfo/binding_plan_generation_test.go @@ -104,11 +104,12 @@ func TestStartState(t *testing.T) { vardef.TiDBOptSelectivityFactor, vardef.TiDBOptPreferRangeScan, vardef.TiDBOptEnableNoDecorrelateInSelect, + vardef.TiDBOptEnableSemiJoinRewrite, vardef.TiDBOptCartesianJoinOrderThreshold, } fixes := []uint64{fixcontrol.Fix44855, fixcontrol.Fix45132, fixcontrol.Fix52869} state, err := getStartState(vars, fixes) require.NoError(t, err) - require.Equal(t, state.Encode(), "1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,0.0100,0.0000,0.0000,0.0000,0.8000,true,false,0.0000,OFF,1000,OFF") + require.Equal(t, state.Encode(), "1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,1.0000,0.0100,0.0000,0.0000,0.0000,0.8000,true,false,false,0.0000,OFF,1000,OFF") } diff --git a/pkg/planner/core/expression_rewriter.go b/pkg/planner/core/expression_rewriter.go index 1dcafdf537e76..8e85bca687a4c 100644 --- a/pkg/planner/core/expression_rewriter.go +++ b/pkg/planner/core/expression_rewriter.go @@ -1282,6 +1282,8 @@ func isNoDecorrelate(planCtx *exprRewriterPlanCtx, corCols []*expression.Correla semiJoinRewrite := hintFlags&hint.HintFlagSemiJoinRewrite > 0 // We can't override noDecorrelate via the variable for EXISTS subqueries with semi join rewrite // as this will cause a conflict that will result in both being disabled in later code + // SemiJoinRewrite does not check the variable TiDBOptEnableSemiJoinRewrite. + // If that variable is enabled - we can still choose NOT to decorrelate here. if !(semiJoinRewrite && sCtx == handlingExistsSubquery) { // Only support scalar and exists subqueries validSubqType := sCtx == handlingScalarSubquery || sCtx == handlingExistsSubquery diff --git a/pkg/planner/core/logical_plan_builder.go b/pkg/planner/core/logical_plan_builder.go index 0c056376c67ea..64cd93e3f7a34 100644 --- a/pkg/planner/core/logical_plan_builder.go +++ b/pkg/planner/core/logical_plan_builder.go @@ -5274,7 +5274,9 @@ func (b *PlanBuilder) buildSemiJoin(outerPlan, innerPlan base.LogicalPlan, onCon } // Apply forces to choose hash join currently, so don't worry the hints will take effect if the semi join is in one apply. joinPlan.SetPreferredJoinTypeAndOrder(b.TableHints()) - if forceRewrite { + // Make the session variable behave like the hint by setting the same prefer bit. + b.ctx.GetSessionVars().RecordRelevantOptVar(vardef.TiDBOptEnableSemiJoinRewrite) + if forceRewrite || b.ctx.GetSessionVars().EnableSemiJoinRewrite { joinPlan.PreferJoinType |= h.PreferRewriteSemiJoin b.optFlag |= rule.FlagSemiJoinRewrite } diff --git a/pkg/planner/core/rule_semi_join_rewrite.go b/pkg/planner/core/rule_semi_join_rewrite.go index d08eaa032b6a9..aa409f11d0421 100644 --- a/pkg/planner/core/rule_semi_join_rewrite.go +++ b/pkg/planner/core/rule_semi_join_rewrite.go @@ -28,7 +28,8 @@ import ( // SemiJoinRewriter rewrites semi join to inner join with aggregation. // Note: This rewriter is only used for exists subquery. -// And it also requires the hint `SEMI_JOIN_REWRITE` to be set. +// And it also requires the hint `SEMI_JOIN_REWRITE` or variable tidb_opt_enable_sem_join_rewrite +// to be set. // For example: // // select * from t where exists (select /*+ SEMI_JOIN_REWRITE() */ * from s where s.a = t.a); @@ -67,7 +68,11 @@ func (smj *SemiJoinRewriter) recursivePlan(p base.LogicalPlan) (base.LogicalPlan join, ok := p.(*logicalop.LogicalJoin) // If it's not a join, or not a (outer) semi join. We just return it since no optimization is needed. // Actually the check of the preferRewriteSemiJoin is a superset of checking the join type. We remain them for a better understanding. - if !ok || !(join.JoinType == base.SemiJoin || join.JoinType == base.LeftOuterSemiJoin) || (join.PreferJoinType&h.PreferRewriteSemiJoin == 0) { + if !ok || !(join.JoinType == base.SemiJoin || join.JoinType == base.LeftOuterSemiJoin) { + return p, nil + } + // Gate by hint or session variable. + if (join.PreferJoinType&h.PreferRewriteSemiJoin) == 0 && !p.SCtx().GetSessionVars().EnableSemiJoinRewrite { return p, nil } // The preferRewriteSemiJoin flag only be used here. We should reset it in order to not affect other parts. diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index 4d92bd9a704a6..784d6abbcd881 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -315,6 +315,10 @@ const ( // TiDBOptEnableNoDecorrelateInSelect is used to control whether to enable the NO_DECORRELATE hint for subqueries in the select list. TiDBOptEnableNoDecorrelateInSelect = "tidb_opt_enable_no_decorrelate_in_select" + // TiDBEnableSemiJoinRewrite controls automatic rewrite of semi-join to + // inner-join with aggregation (equivalent to SEMI_JOIN_REWRITE() hint). + TiDBOptEnableSemiJoinRewrite = "tidb_opt_enable_semi_join_rewrite" + // TiDBOptEnableCorrelationAdjustment is used to indicates if enable correlation adjustment. TiDBOptEnableCorrelationAdjustment = "tidb_opt_enable_correlation_adjustment" @@ -1402,6 +1406,7 @@ const ( DefOptInSubqToJoinAndAgg = true DefOptPreferRangeScan = true DefOptEnableNoDecorrelateInSelect = false + DefOptEnableSemiJoinRewrite = false DefBatchInsert = false DefBatchDelete = false DefBatchCommit = false diff --git a/pkg/sessionctx/variable/session.go b/pkg/sessionctx/variable/session.go index f9fc21a0166f2..61c0912928def 100644 --- a/pkg/sessionctx/variable/session.go +++ b/pkg/sessionctx/variable/session.go @@ -1151,6 +1151,9 @@ type SessionVars struct { // EnableNoDecorrelateInSelect enables the NO_DECORRELATE hint for subqueries in the select list. EnableNoDecorrelateInSelect bool + // EnableSemiJoinRewrite enables the SEMI_JOIN_REWRITE hint for subqueries in the where clause. + EnableSemiJoinRewrite bool + // AllowProjectionPushDown enables pushdown projection on TiKV. AllowProjectionPushDown bool @@ -2220,6 +2223,7 @@ func NewSessionVars(hctx HookContext) *SessionVars { CartesianJoinOrderThreshold: vardef.DefOptCartesianJoinOrderThreshold, EnableOuterJoinReorder: vardef.DefTiDBEnableOuterJoinReorder, EnableNoDecorrelateInSelect: vardef.DefOptEnableNoDecorrelateInSelect, + EnableSemiJoinRewrite: vardef.DefOptEnableSemiJoinRewrite, RetryLimit: vardef.DefTiDBRetryLimit, DisableTxnAutoRetry: vardef.DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index c27f9133517ab..01dfba71baa7d 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -297,6 +297,10 @@ var defaultSysVars = []*SysVar{ s.OptimizerEnableNAAJ = TiDBOptOn(val) return nil }}, + {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.TiDBOptEnableSemiJoinRewrite, Value: BoolToOnOff(vardef.DefOptEnableSemiJoinRewrite), Type: vardef.TypeBool, SetSession: func(s *SessionVars, val string) error { + s.EnableSemiJoinRewrite = TiDBOptOn(val) + return nil + }}, {Scope: vardef.ScopeSession, Name: vardef.TiDBDDLReorgPriority, Value: "PRIORITY_LOW", Type: vardef.TypeEnum, skipInit: true, PossibleValues: []string{"PRIORITY_LOW", "PRIORITY_NORMAL", "PRIORITY_HIGH"}, SetSession: func(s *SessionVars, val string) error { s.setDDLReorgPriority(val) return nil