apache · squito · Aug 28, 2017 · Sep 14, 2017 · Sep 15, 2017 · Sep 15, 2017
diff --git a/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/...talyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1212,6 +1212,73 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
   }
 }
 
+/**
+ * This modifies a timestamp to show how the display time changes going from one timezone to
+ * another, for the same instant in time.
+ *
+ * We intentionally do not provide an ExpressionDescription as this is not meant to be exposed to
+ * users, its only used for internal conversions.
+ */
+private[spark] case class TimestampTimezoneCorrection(
+    time: Expression,
+    from: Expression,
+    to: Expression)
+  extends TernaryExpression with ImplicitCastInputTypes {
+
+  // modeled on ToUTCTimestamp + Conv (as an example TernaryExpression)
+
+  // convertTz() does the *opposite* conversion we want, which is why from & to appear reversed
+  // in all the calls to convertTz.  Its used for showing how the display time changes when we go
+  // from one timezone to another.  We want to see how should change the SQLTimestamp value to
+  // ensure the display does *not* change, despite going from one TZ to another.
+
+  override def children: Seq[Expression] = Seq(time, from, to)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType, StringType)
+  override def dataType: DataType = TimestampType
+  override def prettyName: String = "timestamp_timezone_correction"
+
+  override def nullSafeEval(time: Any, from: Any, to: Any): Any = {
+    DateTimeUtils.convertTz(
+      time.asInstanceOf[Long],
+      to.asInstanceOf[UTF8String].toString(),
+      from.asInstanceOf[UTF8String].toString())
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    if (from.foldable && to.foldable) {
+      val fromTz = from.eval()
+      val toTz = to.eval()
+      if (fromTz == null || toTz == null) {
+        ev.copy(code = s"""
+          |boolean ${ev.isNull} = true;
+          |long ${ev.value} = 0;
+          """.stripMargin)
+      } else {
+        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+        val eval = time.genCode(ctx)
+        ev.copy(code = s"""
+          |${eval.code}
+          |boolean ${ev.isNull} = ${eval.isNull};
+          |long ${ev.value} = 0;
+          |if (!${ev.isNull}) {
+          |  ${ev.value} = $dtu.convertTz(${eval.value}, "$toTz", "$fromTz");
+          |}
+         """.stripMargin)
+      }
+    } else {
+      nullSafeCodeGen(ctx, ev, (time, from, to) =>
+        s"""
+           |${ev.value} = $dtu.convertTz(
+           |  $time,
+           |  $to.toString(),
+           |  $from.toString());
+         """.stripMargin
+      )
+    }
+  }
+}
+
 /**
  * Parses a column to a date based on the given format.
  */

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -109,6 +109,11 @@ object DateTimeUtils {
     computedTimeZones.computeIfAbsent(timeZoneId, computeTimeZone)
   }
 
+  lazy val validTimezones = TimeZone.getAvailableIDs().toSet
+  def isValidTimezone(timezoneId: String): Boolean = {
+    validTimezones.contains(timezoneId)
+  }
+
   def newDateFormat(formatString: String, timeZone: TimeZone): DateFormat = {
     val sdf = new SimpleDateFormat(formatString, Locale.US)
     sdf.setTimeZone(timeZone)
@@ -1015,6 +1020,10 @@ object DateTimeUtils {
     guess
   }
 
+  def convertTz(ts: SQLTimestamp, fromZone: String, toZone: String): SQLTimestamp = {
+    convertTz(ts, getTimeZone(fromZone), getTimeZone(toZone))
+  }
+
   /**
    * Convert the timestamp `ts` from one timezone to another.
    *

diff --git a/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/...alyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -741,4 +741,26 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     test("2015-07-24 00:00:00", null, null)
     test(null, null, null)
   }
+
+  test("parquet_timestamp_correction") {
+    def test(t: String, fromTz: String, toTz: String, expected: String): Unit = {
+      checkEvaluation(
+        TimestampTimezoneCorrection(
+          Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType),
+          Literal.create(fromTz, StringType),
+          Literal.create(toTz, StringType)),
+        if (expected != null) Timestamp.valueOf(expected) else null)
+      checkEvaluation(
+        TimestampTimezoneCorrection(
+          Literal.create(if (t != null) Timestamp.valueOf(t) else null, TimestampType),
+          NonFoldableLiteral.create(fromTz, StringType),
+          NonFoldableLiteral.create(toTz, StringType)),
+        if (expected != null) Timestamp.valueOf(expected) else null)
+    }
+    test("2015-07-24 00:00:00", "UTC", "PST", "2015-07-23 17:00:00")
+    test("2015-01-24 00:00:00", "UTC", "PST", "2015-01-23 16:00:00")
+    test(null, "UTC", "UTC", null)
+    test("2015-07-24 00:00:00", null, null, null)
+    test(null, null, null, null)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -28,7 +28,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.execution.datasources.{DataSource, FailureSafeParser}
+import org.apache.spark.sql.execution.datasources.{DataSource, FailureSafeParser, TimestampTableTimeZone}
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
@@ -179,6 +179,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
         "read files of Hive data source directly.")
     }
+    TimestampTableTimeZone.checkTableTz("", extraOptions.toMap)
 
     sparkSession.baseRelationToDataFrame(
       DataSource.apply(

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation, TimestampTableTimeZone}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 
@@ -215,6 +215,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    */
   def save(path: String): Unit = {
     this.extraOptions += ("path" -> path)
+    TimestampTableTimeZone.checkTableTz(s"for path $path", extraOptions.toMap)
     save()
   }
 
@@ -266,6 +267,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def insertInto(tableName: String): Unit = {
+    extraOptions.get(TimestampTableTimeZone.TIMEZONE_PROPERTY).foreach { tz =>
+      throw new AnalysisException("Cannot provide a table timezone on insert; tried to insert " +
+        s"$tableName with ${TimestampTableTimeZone.TIMEZONE_PROPERTY}=$tz")
+    }
     insertInto(df.sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName))
   }
 
@@ -406,6 +411,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     } else {
       CatalogTableType.MANAGED
     }
+    val props =
+      extraOptions.filterKeys(key => key == TimestampTableTimeZone.TIMEZONE_PROPERTY).toMap
+    TimestampTableTimeZone.checkTableTz(tableIdent, props)
 
     val tableDesc = CatalogTable(
       identifier = tableIdent,
@@ -414,7 +422,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       schema = new StructType,
       provider = Some(source),
       partitionColumnNames = partitioningColumns.getOrElse(Nil),
-      bucketSpec = getBucketSpec)
+      bucketSpec = getBucketSpec,
+      properties = props)
 
     runCommand(df.sparkSession, "saveAsTable")(CreateTable(tableDesc, mode, Some(df.logicalPlan)))
   }

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils, TimestampTableTimeZone}
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter
 import org.apache.spark.sql.internal.HiveSerDe
@@ -230,6 +230,13 @@ case class AlterTableSetPropertiesCommand(
     isView: Boolean)
   extends RunnableCommand {
 
+  if (isView) {
+    properties.get(TimestampTableTimeZone.TIMEZONE_PROPERTY).foreach { _ =>
+      throw new AnalysisException("Timezone cannot be set for view")
+    }
+  }
+  TimestampTableTimeZone.checkTableTz(tableName, properties)
+
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -34,8 +34,8 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
-import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, DateTimeUtils}
+import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils, TimestampTableTimeZone}
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -86,7 +86,8 @@ case class CreateTableLikeCommand(
         schema = sourceTableDesc.schema,
         provider = newProvider,
         partitionColumnNames = sourceTableDesc.partitionColumnNames,
-        bucketSpec = sourceTableDesc.bucketSpec)
+        bucketSpec = sourceTableDesc.bucketSpec,
+        properties = sourceTableDesc.properties)
 
     catalog.createTable(newTableDesc, ifNotExists)
     Seq.empty[Row]
@@ -126,6 +127,8 @@ case class CreateTableCommand(
     sparkSession.sessionState.catalog.createTable(table, ignoreIfExists)
     Seq.empty[Row]
   }
+
+  TimestampTableTimeZone.checkTableTz(table.identifier, table.properties)
 }
 
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
+import org.apache.spark.sql.execution.datasources.TimestampTableTimeZone
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -123,6 +124,10 @@ case class CreateViewCommand(
       s"It is not allowed to add database prefix `$database` for the TEMPORARY view name.")
   }
 
+  properties.get(TimestampTableTimeZone.TIMEZONE_PROPERTY).foreach { _ =>
+    throw new AnalysisException("Timezone cannot be set for view")
+  }
+
   override def run(sparkSession: SparkSession): Seq[Row] = {
     // If the plan cannot be analyzed, throw an exception and don't proceed.
     val qe = sparkSession.sessionState.executePlan(child)