Skip to content

Commit dc61ed4

Browse files
Kapil Singhcloud-fan
authored andcommitted
[SPARK-18091][SQL] Deep if expressions cause Generated SpecificUnsafeProjection code to exceed JVM code size limit
## What changes were proposed in this pull request? Fix for SPARK-18091 which is a bug related to large if expressions causing generated SpecificUnsafeProjection code to exceed JVM code size limit. This PR changes if expression's code generation to place its predicate, true value and false value expressions' generated code in separate methods in context so as to never generate too long combined code. ## How was this patch tested? Added a unit test and also tested manually with the application (having transformations similar to the unit test) which caused the issue to be identified in the first place. Author: Kapil Singh <[email protected]> Closes #15620 from kapilsingh5050/SPARK-18091-IfCodegenFix. (cherry picked from commit e463678) Signed-off-by: Wenchen Fan <[email protected]>
1 parent 1f57385 commit dc61ed4

File tree

2 files changed

+90
-13
lines changed

2 files changed

+90
-13
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,75 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
6060
val trueEval = trueValue.genCode(ctx)
6161
val falseEval = falseValue.genCode(ctx)
6262

63-
ev.copy(code = s"""
64-
${condEval.code}
65-
boolean ${ev.isNull} = false;
66-
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
67-
if (!${condEval.isNull} && ${condEval.value}) {
68-
${trueEval.code}
69-
${ev.isNull} = ${trueEval.isNull};
70-
${ev.value} = ${trueEval.value};
71-
} else {
72-
${falseEval.code}
73-
${ev.isNull} = ${falseEval.isNull};
74-
${ev.value} = ${falseEval.value};
75-
}""")
63+
// place generated code of condition, true value and false value in separate methods if
64+
// their code combined is large
65+
val combinedLength = condEval.code.length + trueEval.code.length + falseEval.code.length
66+
val generatedCode = if (combinedLength > 1024 &&
67+
// Split these expressions only if they are created from a row object
68+
(ctx.INPUT_ROW != null && ctx.currentVars == null)) {
69+
70+
val (condFuncName, condGlobalIsNull, condGlobalValue) =
71+
createAndAddFunction(ctx, condEval, predicate.dataType, "evalIfCondExpr")
72+
val (trueFuncName, trueGlobalIsNull, trueGlobalValue) =
73+
createAndAddFunction(ctx, trueEval, trueValue.dataType, "evalIfTrueExpr")
74+
val (falseFuncName, falseGlobalIsNull, falseGlobalValue) =
75+
createAndAddFunction(ctx, falseEval, falseValue.dataType, "evalIfFalseExpr")
76+
s"""
77+
$condFuncName(${ctx.INPUT_ROW});
78+
boolean ${ev.isNull} = false;
79+
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
80+
if (!$condGlobalIsNull && $condGlobalValue) {
81+
$trueFuncName(${ctx.INPUT_ROW});
82+
${ev.isNull} = $trueGlobalIsNull;
83+
${ev.value} = $trueGlobalValue;
84+
} else {
85+
$falseFuncName(${ctx.INPUT_ROW});
86+
${ev.isNull} = $falseGlobalIsNull;
87+
${ev.value} = $falseGlobalValue;
88+
}
89+
"""
90+
}
91+
else {
92+
s"""
93+
${condEval.code}
94+
boolean ${ev.isNull} = false;
95+
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
96+
if (!${condEval.isNull} && ${condEval.value}) {
97+
${trueEval.code}
98+
${ev.isNull} = ${trueEval.isNull};
99+
${ev.value} = ${trueEval.value};
100+
} else {
101+
${falseEval.code}
102+
${ev.isNull} = ${falseEval.isNull};
103+
${ev.value} = ${falseEval.value};
104+
}
105+
"""
106+
}
107+
108+
ev.copy(code = generatedCode)
109+
}
110+
111+
private def createAndAddFunction(
112+
ctx: CodegenContext,
113+
ev: ExprCode,
114+
dataType: DataType,
115+
baseFuncName: String): (String, String, String) = {
116+
val globalIsNull = ctx.freshName("isNull")
117+
ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
118+
val globalValue = ctx.freshName("value")
119+
ctx.addMutableState(ctx.javaType(dataType), globalValue,
120+
s"$globalValue = ${ctx.defaultValue(dataType)};")
121+
val funcName = ctx.freshName(baseFuncName)
122+
val funcBody =
123+
s"""
124+
|private void $funcName(InternalRow ${ctx.INPUT_ROW}) {
125+
| ${ev.code.trim}
126+
| $globalIsNull = ${ev.isNull};
127+
| $globalValue = ${ev.value};
128+
|}
129+
""".stripMargin
130+
ctx.addNewFunction(funcName, funcBody)
131+
(funcName, globalIsNull, globalValue)
76132
}
77133

78134
override def toString: String = s"if ($predicate) $trueValue else $falseValue"

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,27 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
9595
assert(actual(0) == cases)
9696
}
9797

98+
test("SPARK-18091: split large if expressions into blocks due to JVM code size limit") {
99+
val inStr = "StringForTesting"
100+
val row = create_row(inStr)
101+
val inputStrAttr = 'a.string.at(0)
102+
103+
var strExpr: Expression = inputStrAttr
104+
for (_ <- 1 to 13) {
105+
strExpr = If(EqualTo(Decode(Encode(strExpr, "utf-8"), "utf-8"), inputStrAttr),
106+
strExpr, strExpr)
107+
}
108+
109+
val expressions = Seq(strExpr)
110+
val plan = GenerateUnsafeProjection.generate(expressions, true)
111+
val actual = plan(row).toSeq(expressions.map(_.dataType))
112+
val expected = Seq(UTF8String.fromString(inStr))
113+
114+
if (!checkResult(actual, expected)) {
115+
fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
116+
}
117+
}
118+
98119
test("SPARK-14793: split wide array creation into blocks due to JVM code size limit") {
99120
val length = 5000
100121
val expressions = Seq(CreateArray(List.fill(length)(EqualTo(Literal(1), Literal(1)))))

0 commit comments

Comments
 (0)