@@ -206,22 +206,27 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
206206 case DateType => buildCast[Int ](_, d => UTF8String .fromString(DateTimeUtils .dateToString(d)))
207207 case TimestampType => buildCast[Long ](_,
208208 t => UTF8String .fromString(DateTimeUtils .timestampToString(t, timeZone)))
209- case ar : ArrayType =>
209+ case ArrayType (et, _) =>
210210 buildCast[ArrayData ](_, array => {
211- val res = new UTF8StringBuilder
212- res .append(" [" )
211+ val builder = new UTF8StringBuilder
212+ builder .append(" [" )
213213 if (array.numElements > 0 ) {
214- val toUTF8String = castToString(ar.elementType)
215- res.append(toUTF8String(array.get(0 , ar.elementType)).asInstanceOf [UTF8String ])
214+ val toUTF8String = castToString(et)
215+ if (! array.isNullAt(0 )) {
216+ builder.append(toUTF8String(array.get(0 , et)).asInstanceOf [UTF8String ])
217+ }
216218 var i = 1
217219 while (i < array.numElements) {
218- res.append(" , " )
219- res.append(toUTF8String(array.get(i, ar.elementType)).asInstanceOf [UTF8String ])
220+ builder.append(" ," )
221+ if (! array.isNullAt(i)) {
222+ builder.append(" " )
223+ builder.append(toUTF8String(array.get(i, et)).asInstanceOf [UTF8String ])
224+ }
220225 i += 1
221226 }
222227 }
223- res .append(" ]" )
224- res.toUTF8String
228+ builder .append(" ]" )
229+ builder.build()
225230 })
226231 case _ => buildCast[Any ](_, o => UTF8String .fromString(o.toString))
227232 }
@@ -614,45 +619,37 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
614619 """
615620 }
616621
617- private [this ] def writeElemToBufferCode (
618- dataType : DataType ,
619- buffer : String ,
620- elemTerm : String ,
621- ctx : CodegenContext ): String = dataType match {
622- case BinaryType | StringType => s " $buffer.append( $elemTerm) "
623- case DateType => s """ $buffer.append(
624- org.apache.spark.sql.catalyst.util.DateTimeUtils.dateToString( $elemTerm)) """
625- case TimestampType => s """ $buffer.append(
626- org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString( $elemTerm)) """
627- case ar : ArrayType => s " ${codegenWriteArrayToBuffer(ar, ctx)}( $elemTerm, $buffer) "
628- case _ => s " $buffer.append(String.valueOf( $elemTerm)) "
629- }
622+ private [this ] def codegenWriteArrayElemCode (et : DataType , ctx : CodegenContext ): String = {
623+ val elementToStringCode = castToStringCode(et, ctx)
624+ val funcName = ctx.freshName(" elementToString" )
625+ val elementToStringFunc = ctx.addNewFunction(funcName,
626+ s """
627+ |private UTF8String $funcName( ${ctx.javaType(et)} element) {
628+ | UTF8String elementStr = null;
629+ | ${elementToStringCode(" element" , " elementStr" , null /* resultIsNull won't be used */ )}
630+ | return elementStr;
631+ |}
632+ """ .stripMargin)
630633
631- private [this ] def codegenWriteArrayToBuffer (ar : ArrayType , ctx : CodegenContext ): String = {
632634 val loopIndex = ctx.freshName(" loopIndex" )
633635 val writeArrayToBuffer = ctx.freshName(" writeArrayToBuffer" )
634636 val arTerm = ctx.freshName(" arTerm" )
635637 val bufferClass = classOf [UTF8StringBuilder ].getName
636638 val bufferTerm = ctx.freshName(" bufferTerm" )
637- def writeElemCode (elemTerm : String ) = {
638- writeElemToBufferCode(ar.elementType, bufferTerm, elemTerm, ctx)
639- }
640- def writeToBufferCode (i : String ) = {
641- val elemTerm = ctx.freshName(" elemTerm" )
642- s """
643- | ${ctx.javaType(ar.elementType)} $elemTerm = ${ctx.getValue(arTerm, ar.elementType, i)};
644- | ${writeElemCode(elemTerm)};
645- """ .stripMargin
646- }
647639 ctx.addNewFunction(writeArrayToBuffer,
648640 s """
649641 |private void $writeArrayToBuffer(ArrayData $arTerm, $bufferClass $bufferTerm) {
650642 | $bufferTerm.append("[");
651643 | if ( $arTerm.numElements() > 0) {
652- | ${writeToBufferCode(" 0" )}
644+ | if (! $arTerm.isNullAt(0)) {
645+ | $bufferTerm.append( $elementToStringFunc( ${ctx.getValue(arTerm, et, " 0" )}));
646+ | }
653647 | for (int $loopIndex = 1; $loopIndex < $arTerm.numElements(); $loopIndex++) {
654- | $bufferTerm.append(", ");
655- | ${writeToBufferCode(loopIndex)}
648+ | $bufferTerm.append(",");
649+ | if (! $arTerm.isNullAt( $loopIndex)) {
650+ | $bufferTerm.append(" ");
651+ | $bufferTerm.append( $elementToStringFunc( ${ctx.getValue(arTerm, et, loopIndex)}));
652+ | }
656653 | }
657654 | }
658655 | $bufferTerm.append("]");
@@ -671,15 +668,15 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
671668 val tz = ctx.addReferenceObj(" timeZone" , timeZone)
672669 (c, evPrim, evNull) => s """ $evPrim = UTF8String.fromString(
673670 org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString( $c, $tz)); """
674- case ar : ArrayType =>
671+ case ArrayType (et, _) =>
675672 (c, evPrim, evNull) => {
676673 val bufferTerm = ctx.freshName(" bufferTerm" )
677674 val bufferClass = classOf [UTF8StringBuilder ].getName
678- val writeArrayToBuffer = codegenWriteArrayToBuffer(ar , ctx)
675+ val writeArrayElemCode = codegenWriteArrayElemCode(et , ctx)
679676 s """
680677 | $bufferClass $bufferTerm = new $bufferClass();
681- | $writeArrayToBuffer ( $c, $bufferTerm);
682- | $evPrim = $bufferTerm.toUTF8String ();
678+ | $writeArrayElemCode ( $c, $bufferTerm);
679+ | $evPrim = $bufferTerm.build ();
683680 """ .stripMargin
684681 }
685682 case _ =>
0 commit comments