Skip to content

Commit 733cbaa

Browse files
bomengmarmbrus
authored andcommitted
[SPARK-15062][SQL] fix list type infer serializer issue
## What changes were proposed in this pull request? Make serializer correctly inferred if the input type is `List[_]`, since `List[_]` is type of `Seq[_]`, before it was matched to different case (`case t if definedByConstructorParams(t)`). ## How was this patch tested? New test case was added. Author: bomeng <[email protected]> Closes #12849 from bomeng/SPARK-15062. (cherry picked from commit 0fd95be) Signed-off-by: Michael Armbrust <[email protected]>
1 parent 8616796 commit 733cbaa

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,13 @@ object ScalaReflection extends ScalaReflection {
509509
serializerFor(unwrapped, optType, newPath))
510510
}
511511

512+
// Since List[_] also belongs to localTypeOf[Product], we put this case before
513+
// "case t if definedByConstructorParams(t)" to make sure it will match to the
514+
// case "localTypeOf[Seq[_]]"
515+
case t if t <:< localTypeOf[Seq[_]] =>
516+
val TypeRef(_, _, Seq(elementType)) = t
517+
toCatalystArray(inputObject, elementType)
518+
512519
case t if definedByConstructorParams(t) =>
513520
val params = getConstructorParameters(t)
514521
val nonNullOutput = CreateNamedStruct(params.flatMap { case (fieldName, fieldType) =>
@@ -524,10 +531,6 @@ object ScalaReflection extends ScalaReflection {
524531
val TypeRef(_, _, Seq(elementType)) = t
525532
toCatalystArray(inputObject, elementType)
526533

527-
case t if t <:< localTypeOf[Seq[_]] =>
528-
val TypeRef(_, _, Seq(elementType)) = t
529-
toCatalystArray(inputObject, elementType)
530-
531534
case t if t <:< localTypeOf[Map[_, _]] =>
532535
val TypeRef(_, _, Seq(keyType, valueType)) = t
533536

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ import java.sql.{Date, Timestamp}
2323
import scala.reflect.runtime.universe.typeOf
2424

2525
import org.apache.spark.SparkFunSuite
26-
import org.apache.spark.sql.catalyst.expressions.{BoundReference, SpecificMutableRow}
26+
import org.apache.spark.sql.catalyst.expressions.{BoundReference, Literal, NewInstance, SpecificMutableRow}
2727
import org.apache.spark.sql.types._
28+
import org.apache.spark.unsafe.types.UTF8String
2829
import org.apache.spark.util.Utils
2930

3031
case class PrimitiveData(
@@ -277,6 +278,18 @@ class ScalaReflectionSuite extends SparkFunSuite {
277278
assert(anyTypes === Seq(classOf[java.lang.Object], classOf[java.lang.Object]))
278279
}
279280

281+
test("SPARK-15062: Get correct serializer for List[_]") {
282+
val list = List(1, 2, 3)
283+
val serializer = serializerFor[List[Int]](BoundReference(
284+
0, ObjectType(list.getClass), nullable = false))
285+
assert(serializer.children.size == 2)
286+
assert(serializer.children.head.isInstanceOf[Literal])
287+
assert(serializer.children.head.asInstanceOf[Literal].value === UTF8String.fromString("value"))
288+
assert(serializer.children.last.isInstanceOf[NewInstance])
289+
assert(serializer.children.last.asInstanceOf[NewInstance]
290+
.cls.isInstanceOf[Class[org.apache.spark.sql.catalyst.util.GenericArrayData]])
291+
}
292+
280293
private val dataTypeForComplexData = dataTypeFor[ComplexData]
281294
private val typeOfComplexData = typeOf[ComplexData]
282295

0 commit comments

Comments
 (0)