Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ object ScalaReflection extends ScalaReflection {
case _ => UpCast(expr, expected, walkedTypePath)
}

val className = getClassNameFromType(tpe)
tpe match {
case t if !dataTypeFor(t).isInstanceOf[ObjectType] => getPath

Expand Down Expand Up @@ -372,6 +373,17 @@ object ScalaReflection extends ScalaReflection {
} else {
newInstance
}

case t if Utils.classIsLoadable(className) &&
Utils.classForName(className).isAnnotationPresent(classOf[SQLUserDefinedType]) =>
val udt = Utils.classForName(className)
.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
val obj = NewInstance(
udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(),
Nil,
false,
dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()))
Invoke(obj, "deserialize", ObjectType(udt.userClass), getPath :: Nil)
}
}

Expand Down Expand Up @@ -421,6 +433,7 @@ object ScalaReflection extends ScalaReflection {
if (!inputObject.dataType.isInstanceOf[ObjectType]) {
inputObject
} else {
val className = getClassNameFromType(tpe)
tpe match {
case t if t <:< localTypeOf[Option[_]] =>
val TypeRef(_, _, Seq(optType)) = t
Expand Down Expand Up @@ -589,6 +602,17 @@ object ScalaReflection extends ScalaReflection {
case t if t <:< localTypeOf[java.lang.Boolean] =>
Invoke(inputObject, "booleanValue", BooleanType)

case t if Utils.classIsLoadable(className) &&
Utils.classForName(className).isAnnotationPresent(classOf[SQLUserDefinedType]) =>
val udt = Utils.classForName(className)
.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
val obj = NewInstance(
udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(),
Nil,
false,
dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()))
Invoke(obj, "serialize", udt.sqlType, inputObject :: Nil)

case other =>
throw new UnsupportedOperationException(
s"No Encoder found for $tpe\n" + walkedTypePath.mkString("\n"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ object Cast {
toField.nullable)
}

case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt1.userClass == udt2.userClass =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this worth another JIRA.

Is ScalaRelfection the only place that may use UDT in Cast?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so, as it is a special use case. I will open another JIRA for it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixing is submitted as pr #10410.

true

case _ => false
}

Expand Down Expand Up @@ -473,6 +476,9 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
castArrayCode(from.asInstanceOf[ArrayType].elementType, array.elementType, ctx)
case map: MapType => castMapCode(from.asInstanceOf[MapType], map, ctx)
case struct: StructType => castStructCode(from.asInstanceOf[StructType], struct, ctx)
case udt: UserDefinedType[_]
if udt.userClass == from.asInstanceOf[UserDefinedType[_]].userClass =>
(c, evPrim, evNull) => s"$evPrim = $c;"
}

// Since we need to cast child expressions recursively inside ComplexTypes, such as Map's
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,21 @@

package org.apache.spark.sql

import java.util.concurrent.ConcurrentMap

import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayData}

import scala.beans.{BeanInfo, BeanProperty}
import scala.reflect.runtime.universe.TypeTag

import com.google.common.collect.MapMaker

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection}
import org.apache.spark.sql.catalyst.encoders._
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
import org.apache.spark.sql.functions._
import org.apache.spark.sql.test.SharedSQLContext
Expand Down Expand Up @@ -89,6 +98,30 @@ class UserDefinedTypeSuite extends QueryTest with SharedSQLContext with ParquetT
assert(featuresArrays.contains(new MyDenseVector(Array(0.2, 2.0))))
}

private val outers: ConcurrentMap[String, AnyRef] = new MapMaker().weakValues().makeMap()
outers.put(getClass.getName, this)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes are not needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot to remove them.

test("user type with ScalaReflection") {
val points = Seq(
MyLabeledPoint(1.0, new MyDenseVector(Array(0.1, 1.0))),
MyLabeledPoint(0.0, new MyDenseVector(Array(0.2, 2.0))))

val schema = ScalaReflection.schemaFor[MyLabeledPoint].dataType.asInstanceOf[StructType]
val attributeSeq = schema.toAttributes

val pointEncoder = encoderFor[MyLabeledPoint]
val unsafeRows = points.map(pointEncoder.toRow(_).copy())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also test encoder.fromRow?

we can just create a MyLabelPoint and encode it to InternalRow and decode it back by encoder, and check if the decoded MyLabelPoint is same with the original one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the test now. By doing this, I also found that we need to add UserDefinedType to Cast to make it work.

val df = DataFrame(sqlContext, LocalRelation(attributeSeq, unsafeRows))
val decodedPoints = df.collect()
points.zip(decodedPoints).foreach { case (p, p2) =>
assert(p.label == p2(0) && p.features == p2(1))
}

val boundEncoder = pointEncoder.resolve(attributeSeq, outers).bind(attributeSeq)
val point = MyLabeledPoint(1.0, new MyDenseVector(Array(0.1, 1.0)))
assert(boundEncoder.fromRow(boundEncoder.toRow(point)) === point)
}

test("UDTs and UDFs") {
sqlContext.udf.register("testType", (d: MyDenseVector) => d.isInstanceOf[MyDenseVector])
pointsRDD.registerTempTable("points")
Expand Down