Skip to content

Commit f672083

Browse files
rdbluerxin
authored andcommitted
[SPARK-18368] Fix regexp_replace with task serialization.
## What changes were proposed in this pull request? This makes the result value both transient and lazy, so that if the RegExpReplace object is initialized then serialized, `result: StringBuffer` will be correctly initialized. ## How was this patch tested? * Verified that this patch fixed the query that found the bug. * Added a test case that fails without the fix. Author: Ryan Blue <[email protected]> Closes #15816 from rdblue/SPARK-18368-fix-regexp-replace. (cherry picked from commit b9192bb) Signed-off-by: Reynold Xin <[email protected]>
1 parent 0dc14f1 commit f672083

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
230230
@transient private var lastReplacement: String = _
231231
@transient private var lastReplacementInUTF8: UTF8String = _
232232
// result buffer write by Matcher
233-
@transient private val result: StringBuffer = new StringBuffer
233+
@transient private lazy val result: StringBuffer = new StringBuffer
234234

235235
override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
236236
if (!p.equals(lastRegex)) {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ import org.scalactic.TripleEqualsSupport.Spread
2222
import org.scalatest.exceptions.TestFailedException
2323
import org.scalatest.prop.GeneratorDrivenPropertyChecks
2424

25-
import org.apache.spark.SparkFunSuite
25+
import org.apache.spark.{SparkConf, SparkFunSuite}
26+
import org.apache.spark.serializer.JavaSerializer
2627
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
2728
import org.apache.spark.sql.catalyst.expressions.codegen._
2829
import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -43,13 +44,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
4344

4445
protected def checkEvaluation(
4546
expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
47+
val serializer = new JavaSerializer(new SparkConf()).newInstance
48+
val expr: Expression = serializer.deserialize(serializer.serialize(expression))
4649
val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
47-
checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
48-
checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
49-
if (GenerateUnsafeProjection.canSupport(expression.dataType)) {
50-
checkEvalutionWithUnsafeProjection(expression, catalystValue, inputRow)
50+
checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
51+
checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
52+
if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
53+
checkEvalutionWithUnsafeProjection(expr, catalystValue, inputRow)
5154
}
52-
checkEvaluationWithOptimization(expression, catalystValue, inputRow)
55+
checkEvaluationWithOptimization(expr, catalystValue, inputRow)
5356
}
5457

5558
/**

0 commit comments

Comments
 (0)