Skip to content

Commit bdddc66

Browse files
rdbluerxin
authored andcommitted
[SPARK-18368] Fix regexp_replace with task serialization.
## What changes were proposed in this pull request? This makes the result value both transient and lazy, so that if the RegExpReplace object is initialized then serialized, `result: StringBuffer` will be correctly initialized. ## How was this patch tested? * Verified that this patch fixed the query that found the bug. * Added a test case that fails without the fix. Author: Ryan Blue <[email protected]> Closes #15816 from rdblue/SPARK-18368-fix-regexp-replace. (cherry picked from commit b9192bb) Signed-off-by: Reynold Xin <[email protected]>
1 parent 0cceb1b commit bdddc66

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
220220
@transient private var lastReplacement: String = _
221221
@transient private var lastReplacementInUTF8: UTF8String = _
222222
// result buffer write by Matcher
223-
@transient private val result: StringBuffer = new StringBuffer
223+
@transient private lazy val result: StringBuffer = new StringBuffer
224224

225225
override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
226226
if (!p.equals(lastRegex)) {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ import org.scalacheck.Gen
2121
import org.scalactic.TripleEqualsSupport.Spread
2222
import org.scalatest.prop.GeneratorDrivenPropertyChecks
2323

24-
import org.apache.spark.SparkFunSuite
24+
import org.apache.spark.{SparkConf, SparkFunSuite}
25+
import org.apache.spark.serializer.JavaSerializer
2526
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
2627
import org.apache.spark.sql.catalyst.expressions.codegen._
2728
import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -42,13 +43,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
4243

4344
protected def checkEvaluation(
4445
expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
46+
val serializer = new JavaSerializer(new SparkConf()).newInstance
47+
val expr: Expression = serializer.deserialize(serializer.serialize(expression))
4548
val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
46-
checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
47-
checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
48-
if (GenerateUnsafeProjection.canSupport(expression.dataType)) {
49-
checkEvalutionWithUnsafeProjection(expression, catalystValue, inputRow)
49+
checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
50+
checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
51+
if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
52+
checkEvalutionWithUnsafeProjection(expr, catalystValue, inputRow)
5053
}
51-
checkEvaluationWithOptimization(expression, catalystValue, inputRow)
54+
checkEvaluationWithOptimization(expr, catalystValue, inputRow)
5255
}
5356

5457
/**

0 commit comments

Comments
 (0)