@@ -199,30 +199,59 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
199199
200200 // [[func]] assumes the input is no longer null because eval already does the null check.
201201 @ inline private [this ] def buildCast [T ](a : Any , func : T => Any ): Any = func(a.asInstanceOf [T ])
202+ @ inline private [this ] def buildWriter [T ](
203+ a : Any , buffer : UTF8StringBuilder , writer : (T , UTF8StringBuilder ) => Unit ): Unit = {
204+ writer(a.asInstanceOf [T ], buffer)
205+ }
206+
207+ private [this ] def buildElemWriter (
208+ from : DataType ): (Any , UTF8StringBuilder ) => Unit = from match {
209+ case BinaryType => buildWriter[Array [Byte ]](_, _, (b, buf) => buf.append(b))
210+ case StringType => buildWriter[UTF8String ](_, _, (b, buf) => buf.append(b))
211+ case DateType => buildWriter[Int ](_, _,
212+ (d, buf) => buf.append(DateTimeUtils .dateToString(d)))
213+ case TimestampType => buildWriter[Long ](_, _,
214+ (t, buf) => buf.append(DateTimeUtils .timestampToString(t)))
215+ case ar : ArrayType =>
216+ buildWriter[ArrayData ](_, _, (array, buf) => {
217+ buf.append(" [" )
218+ if (array.numElements > 0 ) {
219+ val writeElemToBuffer = buildElemWriter(ar.elementType)
220+ writeElemToBuffer(array.get(0 , ar.elementType), buf)
221+ var i = 1
222+ while (i < array.numElements) {
223+ buf.append(" , " )
224+ writeElemToBuffer(array.get(i, ar.elementType), buf)
225+ i += 1
226+ }
227+ }
228+ buf.append(" ]" )
229+ })
230+ case _ => buildWriter[Any ](_, _, (o, buf) => buf.append(String .valueOf(o)))
231+ }
202232
203233 // UDFToString
204234 private [this ] def castToString (from : DataType ): Any => Any = from match {
205235 case BinaryType => buildCast[Array [Byte ]](_, UTF8String .fromBytes)
206- case StringType => buildCast[UTF8String ](_, identity)
207236 case DateType => buildCast[Int ](_, d => UTF8String .fromString(DateTimeUtils .dateToString(d)))
208237 case TimestampType => buildCast[Long ](_,
209238 t => UTF8String .fromString(DateTimeUtils .timestampToString(t, timeZone)))
210239 case ar : ArrayType =>
211240 buildCast[ArrayData ](_, array => {
212- val res = new StringBuilder
241+ val res = new UTF8StringBuilder
213242 res.append(" [" )
214243 if (array.numElements > 0 ) {
215- val toStringFunc = castToString (ar.elementType)
216- res.append(toStringFunc( array.get(0 , ar.elementType)) )
244+ val writeElemToBuffer = buildElemWriter (ar.elementType)
245+ writeElemToBuffer( array.get(0 , ar.elementType), res )
217246 var i = 1
218247 while (i < array.numElements) {
219248 res.append(" , " )
220- res.append(toStringFunc( array.get(i, ar.elementType)) )
249+ writeElemToBuffer( array.get(i, ar.elementType), res )
221250 i += 1
222251 }
223252 }
224253 res.append(" ]" )
225- UTF8String .fromString(res.toString() )
254+ UTF8String .fromString(res.toString)
226255 })
227256 case _ => buildCast[Any ](_, o => UTF8String .fromString(o.toString))
228257 }
@@ -620,21 +649,20 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
620649 buffer : String ,
621650 elemTerm : String ,
622651 ctx : CodegenContext ): String = dataType match {
623- case BinaryType => s " $buffer.append(new String( $elemTerm)) "
624- case StringType => s " $buffer.append(new String( $elemTerm.getBytes())) "
652+ case BinaryType | StringType => s " $buffer.append( $elemTerm) "
625653 case DateType => s """ $buffer.append(
626654 org.apache.spark.sql.catalyst.util.DateTimeUtils.dateToString( $elemTerm)) """
627655 case TimestampType => s """ $buffer.append(
628656 org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString( $elemTerm)) """
629657 case ar : ArrayType => s " ${codegenWriteArrayToBuffer(ar, ctx)}( $elemTerm, $buffer) "
630- case _ => s " $buffer.append( $elemTerm) "
658+ case _ => s " $buffer.append(String.valueOf( $elemTerm) )"
631659 }
632660
633661 private [this ] def codegenWriteArrayToBuffer (ar : ArrayType , ctx : CodegenContext ): String = {
634662 val loopIndex = ctx.freshName(" loopIndex" )
635663 val writeArrayToBuffer = ctx.freshName(" writeArrayToBuffer" )
636664 val arTerm = ctx.freshName(" arTerm" )
637- val bufferClass = " java.lang.StringBuilder "
665+ val bufferClass = classOf [ UTF8StringBuilder ].getName
638666 val bufferTerm = ctx.freshName(" bufferTerm" )
639667 def writeElemCode (elemTerm : String ) = {
640668 writeElemToBufferCode(ar.elementType, bufferTerm, elemTerm, ctx)
@@ -676,7 +704,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
676704 case ar : ArrayType =>
677705 (c, evPrim, evNull) => {
678706 val bufferTerm = ctx.freshName(" bufferTerm" )
679- val bufferClass = " java.lang.StringBuilder "
707+ val bufferClass = classOf [ UTF8StringBuilder ].getName
680708 val writeArrayToBuffer = codegenWriteArrayToBuffer(ar, ctx)
681709 s """
682710 | $bufferClass $bufferTerm = new $bufferClass();
0 commit comments