Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import scala.util.control.Exception.allCatch
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.analysis.TypeCoercion
import org.apache.spark.sql.catalyst.expressions.ExprUtils
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.catalyst.util.TimestampFormatter
import org.apache.spark.sql.types._

Expand All @@ -32,7 +33,8 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
private val timestampParser = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

private val decimalParser = if (options.locale == Locale.US) {
// Special handling the default locale for backward compatibility
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ class CSVOptions(
// A language tag in IETF BCP 47 format
val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)

val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)

val timestampFormat: String =
parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")

val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import com.univocity.parsers.csv.CsvWriter

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.types._

class UnivocityGenerator(
Expand All @@ -44,11 +45,13 @@ class UnivocityGenerator(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)
private val dateFormatter = DateFormatter(
options.dateFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

private def makeConverter(dataType: DataType): ValueConverter = dataType match {
case DateType =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -86,11 +87,13 @@ class UnivocityParser(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)
private val dateFormatter = DateFormatter(
options.dateFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

private val csvFilters = new CSVFilters(filters, requiredSchema)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}

Expand Down Expand Up @@ -622,13 +623,15 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti

@transient private lazy val formatter: Option[TimestampFormatter] = {
if (right.foldable) {
Option(right.eval()).map(format => TimestampFormatter(format.toString, zoneId))
Option(right.eval()).map { format =>
TimestampFormatter(format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
}
} else None
}

override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
val tf = if (formatter.isEmpty) {
TimestampFormatter(format.toString, zoneId)
TimestampFormatter(format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
} else {
formatter.get
}
Expand All @@ -643,10 +646,14 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
})
}.getOrElse {
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
defineCodeGen(ctx, ev, (timestamp, format) => {
s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid)
.format($timestamp))"""
s"""|UTF8String.fromString($tf$$.MODULE$$.apply(
| $format.toString(),
| $zid,
| $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
|.format($timestamp))""".stripMargin
})
}
}
Expand Down Expand Up @@ -688,7 +695,7 @@ case class ToUnixTimestamp(
copy(timeZoneId = Option(timeZoneId))

def this(time: Expression) = {
this(time, Literal("uuuu-MM-dd HH:mm:ss"))
this(time, Literal(TimestampFormatter.defaultPattern))
}

override def prettyName: String = "to_unix_timestamp"
Expand Down Expand Up @@ -732,7 +739,7 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op
copy(timeZoneId = Option(timeZoneId))

def this(time: Expression) = {
this(time, Literal("uuuu-MM-dd HH:mm:ss"))
this(time, Literal(TimestampFormatter.defaultPattern))
}

def this() = {
Expand All @@ -758,7 +765,7 @@ abstract class ToTimestamp
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: TimestampFormatter =
try {
TimestampFormatter(constFormat.toString, zoneId)
TimestampFormatter(constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
} catch {
case NonFatal(_) => null
}
Expand Down Expand Up @@ -791,8 +798,8 @@ abstract class ToTimestamp
} else {
val formatString = f.asInstanceOf[UTF8String].toString
try {
TimestampFormatter(formatString, zoneId).parse(
t.asInstanceOf[UTF8String].toString) / downScaleFactor
TimestampFormatter(formatString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
} catch {
case NonFatal(_) => null
}
Expand Down Expand Up @@ -831,13 +838,16 @@ abstract class ToTimestamp
}
case StringType =>
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
val locale = ctx.addReferenceObj("locale", Locale.US)
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
nullSafeCodeGen(ctx, ev, (string, format) => {
s"""
try {
${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
.parse($string.toString()) / $downScaleFactor;
${ev.value} = $tf$$.MODULE$$.apply(
$format.toString(),
$zid,
$ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
.parse($string.toString()) / $downScaleFactor;
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
} catch (java.text.ParseException e) {
Expand Down Expand Up @@ -908,7 +918,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
override def prettyName: String = "from_unixtime"

def this(unix: Expression) = {
this(unix, Literal("uuuu-MM-dd HH:mm:ss"))
this(unix, Literal(TimestampFormatter.defaultPattern))
}

override def dataType: DataType = StringType
Expand All @@ -922,7 +932,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: TimestampFormatter =
try {
TimestampFormatter(constFormat.toString, zoneId)
TimestampFormatter(constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
} catch {
case NonFatal(_) => null
}
Expand All @@ -948,8 +958,9 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
null
} else {
try {
UTF8String.fromString(TimestampFormatter(f.toString, zoneId)
.format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
UTF8String.fromString(
TimestampFormatter(f.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
.format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
} catch {
case NonFatal(_) => null
}
Expand Down Expand Up @@ -980,13 +991,14 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
}
} else {
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
val locale = ctx.addReferenceObj("locale", Locale.US)
val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
nullSafeCodeGen(ctx, ev, (seconds, f) => {
s"""
try {
${ev.value} = UTF8String.fromString($tf$$.MODULE$$.apply($f.toString(), $zid, $locale).
format($seconds * 1000000L));
${ev.value} = UTF8String.fromString(
$tf$$.MODULE$$.apply($f.toString(), $zid, $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
.format($seconds * 1000000L));
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ private[sql] class JSONOptions(
val zoneId: ZoneId = DateTimeUtils.getZoneId(
parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))

val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)

val timestampFormat: String =
parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")

val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import com.fasterxml.jackson.core._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.types._

/**
Expand Down Expand Up @@ -80,11 +81,13 @@ private[sql] class JacksonGenerator(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)
private val dateFormatter = DateFormatter(
options.dateFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

private def makeWriter(dataType: DataType): ValueWriter = dataType match {
case NullType =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
Expand Down Expand Up @@ -58,11 +59,13 @@ class JacksonParser(
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)
private val dateFormatter = DateFormatter(
options.dateFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

/**
* Create a converter which converts the JSON documents held by the `JsonParser`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion
import org.apache.spark.sql.catalyst.expressions.ExprUtils
import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
Expand All @@ -40,7 +41,8 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
private val timestampFormatter = TimestampFormatter(
options.timestampFormat,
options.zoneId,
options.locale)
options.locale,
legacyFormat = FAST_DATE_FORMAT)

/**
* Infer the type of a collection of json records in three stages:
Expand Down
Loading