From 8740a2c3b918796e1ac87c3e178f9f9f4651cb75 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 26 Oct 2016 09:42:43 -0700 Subject: [PATCH 1/2] SPARK-18368: Fix regexp_replace with task serialization. --- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 5648ad6b6dc1..4896a6225aa8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -230,7 +230,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio @transient private var lastReplacement: String = _ @transient private var lastReplacementInUTF8: UTF8String = _ // result buffer write by Matcher - @transient private val result: StringBuffer = new StringBuffer + @transient private lazy val result: StringBuffer = new StringBuffer override def nullSafeEval(s: Any, p: Any, r: Any): Any = { if (!p.equals(lastRegex)) { From 3536f6a44f15e2997a1061a6e7ec061f98c3ef9f Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Tue, 8 Nov 2016 15:40:25 -0800 Subject: [PATCH 2/2] SPARK-18368: Add test for regexp_replace serialization. --- .../expressions/RegexpExpressionsSuite.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 5299549e7b4d..d0d1aaa9d299 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.serializer.JavaSerializer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.types.StringType @@ -191,4 +192,17 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(StringSplit(s1, s2), null, row3) } + test("RegExpReplace serialization") { + val serializer = new JavaSerializer(new SparkConf()).newInstance + + val row = create_row("abc", "b", "") + + val s = 's.string.at(0) + val p = 'p.string.at(1) + val r = 'r.string.at(2) + + val expr: RegExpReplace = serializer.deserialize(serializer.serialize(RegExpReplace(s, p, r))) + checkEvaluation(expr, "ac", row) + } + }