-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-29128][SQL] Split predicate code in OR expressions #25827
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -156,6 +156,12 @@ abstract class Expression extends TreeNode[Expression] { | |
|
|
||
| private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = { | ||
| // TODO: support whole stage codegen too | ||
| // | ||
| // NOTE: We could use `CodeGenerator.defineSingleSplitFunction` here for the code path | ||
| // of the whole stage codegen. But, we don't do so now because the performance changes that | ||
| // we don't expect might occur in many queries. Therefore, we currently apply | ||
| // this split function to specific performance-sensitive places only, | ||
| // e.g., common subexpression elimination for the whole stage codegen and OR expressions. | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya @cloud-fan Is this correct? I remember @viirya worked on this in #21140 long before, but the pr wasn't merged because of some reasons: performance or design issues?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recall that is because string-based manipulation was thought too buggy? I didn't remember it is because of performance issue.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha, I see. |
||
| val splitThreshold = SQLConf.get.methodSplitThreshold | ||
| if (eval.code.length > splitThreshold && ctx.INPUT_ROW != null && ctx.currentVars == null) { | ||
| val setIsNull = if (!eval.isNull.isInstanceOf[LiteralValue]) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1022,6 +1022,45 @@ class CodegenContext extends Logging { | |
| genCodes | ||
| } | ||
|
|
||
| /** | ||
| * Defines an individual function for a given expression and returns a caller-side code | ||
| * for the function as `ExprCode`. | ||
| */ | ||
| def defineSingleSplitFunction( | ||
| expr: Expression, | ||
| ev: ExprCode, | ||
| funcNameOption: Option[String] = None, | ||
| inputVarsOption: Option[Seq[VariableValue]] = None): ExprCode = { | ||
| val inputVars = inputVarsOption.getOrElse(getLocalInputVariableValues(this, expr).toSeq) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I am not sure if getLocalInputVariableValues is totally ok to extract all input variables from an expr. This is what I want to do when I was changing codegen. I remember not all expressions are changed to expose input variables information. I did the change but stoped in the middle because the community has different idea other than this string based framework.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ur, I see. we might need more discussion around this code. If that's true, it might be dangerous to use this in splitAggrExpr and common subexpr elimination, too. |
||
| if (isValidParamLength(calculateParamLengthFromExprValues(inputVars))) { | ||
| val (isNull, setIsNull) = if (!ev.isNull.isInstanceOf[LiteralValue]) { | ||
| val globalIsNull = addMutableState(JAVA_BOOLEAN, "globalIsNull") | ||
| (JavaCode.isNullGlobal(globalIsNull), s"$globalIsNull = ${ev.isNull};") | ||
| } else { | ||
| (ev.isNull, "") | ||
| } | ||
|
|
||
| val fnName = freshName(funcNameOption.getOrElse(expr.prettyName)) | ||
| val argList = inputVars.map(v => s"${v.javaType.getName} ${v.variableName}") | ||
| val returnType = javaType(expr.dataType) | ||
| val funcFullName = addNewFunction(fnName, | ||
| s""" | ||
| |private $returnType $fnName(${argList.mkString(", ")}) { | ||
| | ${ev.code} | ||
| | $setIsNull | ||
| | return ${ev.value}; | ||
| |} | ||
| """.stripMargin) | ||
|
|
||
| val newValue = freshName("value") | ||
| val inputVariables = inputVars.map(_.variableName).mkString(", ") | ||
| val code = code"$returnType $newValue = $funcFullName($inputVariables);" | ||
| ExprCode(code, isNull, JavaCode.variable(newValue, expr.dataType)) | ||
| } else { | ||
| ev | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Checks and sets up the state and codegen for subexpression elimination. This finds the | ||
| * common subexpressions, generates the code snippets that evaluate those expressions and | ||
|
|
@@ -1057,39 +1096,15 @@ class CodegenContext extends Logging { | |
| } | ||
| if (inputVarsForAllFuncs.map(calculateParamLengthFromExprValues).forall(isValidParamLength)) { | ||
| commonExprs.zipWithIndex.map { case (exprs, i) => | ||
| val expr = exprs.head | ||
| val eval = commonExprVals(i) | ||
|
|
||
| val isNullLiteral = eval.isNull match { | ||
| case TrueLiteral | FalseLiteral => true | ||
| case _ => false | ||
| } | ||
| val (isNull, isNullEvalCode) = if (!isNullLiteral) { | ||
| val v = addMutableState(JAVA_BOOLEAN, "subExprIsNull") | ||
| (JavaCode.isNullGlobal(v), s"$v = ${eval.isNull};") | ||
| } else { | ||
| (eval.isNull, "") | ||
| } | ||
|
|
||
| // Generate the code for this expression tree and wrap it in a function. | ||
| val fnName = freshName("subExpr") | ||
| val inputVars = inputVarsForAllFuncs(i) | ||
| val argList = inputVars.map(v => s"${v.javaType.getName} ${v.variableName}") | ||
| val returnType = javaType(expr.dataType) | ||
| val fn = | ||
| s""" | ||
| |private $returnType $fnName(${argList.mkString(", ")}) { | ||
| | ${eval.code} | ||
| | $isNullEvalCode | ||
| | return ${eval.value}; | ||
| |} | ||
| """.stripMargin | ||
|
|
||
| val value = freshName("subExprValue") | ||
| val state = SubExprEliminationState(isNull, JavaCode.variable(value, expr.dataType)) | ||
| val funcEval = defineSingleSplitFunction( | ||
| expr = exprs.head, | ||
| ev = commonExprVals(i), | ||
| funcNameOption = Some("subExprValue"), | ||
| inputVarsOption = Some(inputVarsForAllFuncs(i)) | ||
| ) | ||
| val state = SubExprEliminationState(funcEval.isNull, funcEval.value) | ||
| exprs.foreach(localSubExprEliminationExprs.put(_, state)) | ||
| val inputVariables = inputVars.map(_.variableName).mkString(", ") | ||
| s"$returnType $value = ${addNewFunction(fnName, fn)}($inputVariables);" | ||
| funcEval.code.toString | ||
| } | ||
| } else { | ||
| val errMsg = "Failed to split subexpression code into small functions because the " + | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why add this comment?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To avoid duplicate work. Currently, this func and
defineSingleSplitFunctionhave too similar logic.