apache · AngersZhuuuu · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
@@ -65,6 +65,8 @@ license: |
 
   - In Spark 3.2, the output schema of `SHOW TBLPROPERTIES` becomes `key: string, value: string` whether you specify the table property key or not. In Spark 3.1 and earlier, the output schema of `SHOW TBLPROPERTIES` is `value: string` when you specify the table property key. To restore the old schema with the builtin catalog, you can set `spark.sql.legacy.keepCommandOutputSchema` to `true`.
 
+  - In Spark 3.2, we support typed literals in the partition spec of INSERT and ADD/DROP/RENAME PARTITION. For example, `ADD PARTITION(dt = date'2020-01-01')` adds a partition with date value `2020-01-01`. In Spark 3.1 and earlier, the partition value will be parsed as string value `date '2020-01-01'`, which is an illegal date value, and we add a partition with null value at the end.
+
 ## Upgrading from Spark SQL 3.0 to 3.1
 
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.

diff --git a/docs/sql-ref-syntax-ddl-alter-table.md b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -49,7 +49,7 @@ ALTER TABLE table_identifier partition_spec RENAME TO partition_spec
 
 * **partition_spec**
 
-    Partition to be renamed.
+    Partition to be renamed. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
@@ -126,7 +126,7 @@ ALTER TABLE table_identifier ADD [IF NOT EXISTS]
 
 * **partition_spec**
 
-    Partition to be added.
+    Partition to be added. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
@@ -152,7 +152,7 @@ ALTER TABLE table_identifier DROP [ IF EXISTS ] partition_spec [PURGE]
 
 * **partition_spec**
 
-    Partition to be dropped.
+    Partition to be dropped. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
@@ -217,7 +217,7 @@ ALTER TABLE table_identifier [ partition_spec ] SET LOCATION 'new_location'
 
 * **partition_spec**
 
-    Specifies the partition on which the property has to be set.
+    Specifies the partition on which the property has to be set. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 

diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
@@ -41,7 +41,7 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ]
 * **partition_spec**
 
     An optional parameter that specifies a comma-separated list of key and value pairs
-    for partitions.
+    for partitions. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
@@ -206,6 +206,19 @@ SELECT * FROM students;
 +-------------+--------------------------+----------+
 ```
 
+#### Insert Using a Typed Date Literal for a Partition Column Value
+```sql
+CREATE TABLE students (name STRING, address  STRING) PARTITIONED BY (birthday DATE);
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES ('Amy Smith', '123 Park Ave, San Jose');
+
+SELECT * FROM students;
++-------------+-------------------------+-----------+
+|         name|                  address|   birthday|
++-------------+-------------------------+-----------+
+|    Amy Smith|   123 Park Ave, San Jose| 2019-01-02|
++-------------+-------------------------+-----------+
 #### Insert with a column list
 
 ```sql

diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -41,7 +41,7 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
 * **partition_spec**
 
     An optional parameter that specifies a comma-separated list of key and value pairs
-    for partitions.
+    for partitions. Note that one can use a typed literal (e.g., date'2019-01-02') in the partition spec.
 
     **Syntax:** `PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )`
 
@@ -179,6 +179,29 @@ SELECT * FROM students;
 +-----------+-------------------------+----------+
 ```
 
+#### Insert Using a Typed Date Literal for a Partition Column Value
+```sql
+CREATE TABLE students (name STRING, address  STRING) PARTITIONED BY (birthday DATE);
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES ('Amy Smith', '123 Park Ave, San Jose');
+
+SELECT * FROM students;
++-------------+-------------------------+-----------+
+|         name|                  address|   birthday|
++-------------+-------------------------+-----------+
+|    Amy Smith|   123 Park Ave, San Jose| 2019-01-02|
++-------------+-------------------------+-----------+
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES('Jason Wang', '908 Bird St, Saratoga');
+
+SELECT * FROM students;
++-----------+-------------------------+-----------+
+|       name|                  address|   birthday|
++-----------+-------------------------+-----------+
+| Jason Wang|    908 Bird St, Saratoga| 2019-01-02|
++-----------+-------------------------+-----------+
 #### Insert with a column list
 
 ```sql

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -509,10 +509,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   protected def visitStringConstant(
       ctx: ConstantContext,
       legacyNullAsString: Boolean): String = withOrigin(ctx) {
-    ctx match {
-      case _: NullLiteralContext if !legacyNullAsString => null
-      case s: StringLiteralContext => createString(s)
-      case o => o.getText
+    expression(ctx) match {
+      case Literal(null, _) if !legacyNullAsString => null
+      case l @ Literal(null, _) => l.toString
+      case l: Literal =>
+        // TODO For v2 commands, we will cast the string back to its actual value,
+        //  which is a waste and can be improved in the future.
+        Cast(l, StringType, Some(SQLConf.get.sessionLocalTimeZone)).eval().toString
+      case other =>
+        throw new IllegalArgumentException(s"Only literals are allowed in the " +
+          s"partition spec, but got ${other.sql}")
     }
   }
 

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,14 +20,14 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedInlineTable, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
-import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
+import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class DDLParserSuite extends AnalysisTest {
   import CatalystSqlParser._
@@ -2497,4 +2497,28 @@ class DDLParserSuite extends AnalysisTest {
 
     testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
   }
+
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    def insertPartitionPlan(part: String): InsertIntoStatement = {
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("t")),
+        Map("part" -> Some(part)),
+        Seq.empty[String],
+        UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a")))),
+        overwrite = false, ifPartitionNotExists = false)
+    }
+    val binaryStr = "Spark SQL"
+    val binaryHexStr = Hex.hex(UTF8String.fromString(binaryStr).getBytes).toString
+    val dateTypeSql = "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES('a')"
+    val interval = new CalendarInterval(7, 1, 1000).toString
+    val intervalTypeSql = s"INSERT INTO t PARTITION(part = interval'$interval') VALUES('a')"
+    val timestamp = "2019-01-02 11:11:11"
+    val timestampTypeSql = s"INSERT INTO t PARTITION(part = timestamp'$timestamp') VALUES('a')"
+    val binaryTypeSql = s"INSERT INTO t PARTITION(part = X'$binaryHexStr') VALUES('a')"
+
+    comparePlans(parsePlan(dateTypeSql), insertPartitionPlan("2019-01-02"))
+    comparePlans(parsePlan(intervalTypeSql), insertPartitionPlan(interval))
+    comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
+    comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.expressions.Hex
 import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * The base trait for DML - insert syntax
@@ -209,6 +211,52 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    withTable("t1") {
+      val binaryStr = "Spark SQL"
+      val binaryHexStr = Hex.hex(UTF8String.fromString(binaryStr).getBytes).toString
+      sql(
+        """
+          | CREATE TABLE t1(name STRING, part1 DATE, part2 TIMESTAMP, part3 BINARY,
+          |  part4 STRING, part5 STRING, part6 STRING, part7 STRING)
+          | USING PARQUET PARTITIONED BY (part1, part2, part3, part4, part5, part6, part7)
+         """.stripMargin)
+
+      sql(
+        s"""
+           | INSERT OVERWRITE t1 PARTITION(
+           | part1 = date'2019-01-01',
+           | part2 = timestamp'2019-01-01 11:11:11',
+           | part3 = X'$binaryHexStr',
+           | part4 = 'p1',
+           | part5 = date'2019-01-01',
+           | part6 = timestamp'2019-01-01 11:11:11',
+           | part7 = X'$binaryHexStr'
+           | ) VALUES('a')
+        """.stripMargin)
+      checkAnswer(sql(
+        """
+          | SELECT
+          |   name,
+          |   CAST(part1 AS STRING),
+          |   CAST(part2 as STRING),
+          |   CAST(part3 as STRING),
+          |   part4,
+          |   part5,
+          |   part6,
+          |   part7
+          | FROM t1
+        """.stripMargin),
+        Row("a", "2019-01-01", "2019-01-01 11:11:11", "Spark SQL", "p1",
+          "2019-01-01", "2019-01-01 11:11:11", "Spark SQL"))
+
+      val e = intercept[AnalysisException] {
+        sql("CREATE TABLE t2(name STRING, part INTERVAL) USING PARQUET PARTITIONED BY (part)")
+      }.getMessage
+      assert(e.contains("Cannot use interval"))
+    }
+  }
+
   test("SPARK-34556: " +
     "checking duplicate static partition columns should respect case sensitive conf") {
     withTable("t") {

diff --git a/...c/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/...c/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -181,4 +181,12 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
       checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
     }
   }
+
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)")
+      sql(s"ALTER TABLE $t ADD PARTITION(part = date'2020-01-01')")
+      checkPartitions(t, Map("part" ->"2020-01-01"))
+    }
+  }
 }
diff --git a/.../test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/.../test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -230,4 +230,14 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       }
     }
   }
+
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)")
+      sql(s"ALTER TABLE $t ADD PARTITION(part = date'2020-01-01')")
+      checkPartitions(t, Map("part" -> "2020-01-01"))
+      sql(s"ALTER TABLE $t DROP PARTITION (part = date'2020-01-01')")
+      checkPartitions(t)
+    }
+  }
 }
diff --git a/...est/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/...est/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -210,4 +210,15 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
       }
     }
   }
+
+  test("SPARK-33474: Support typed literals as partition spec values") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)")
+      sql(s"ALTER TABLE $t ADD PARTITION(part = date'2020-01-01')")
+      checkPartitions(t, Map("part" -> "2020-01-01"))
+      sql(s"ALTER TABLE $t PARTITION (part = date'2020-01-01')" +
+        s" RENAME TO PARTITION (part = date'2020-01-02')")
+      checkPartitions(t, Map("part" -> "2020-01-02"))
+    }
+  }
 }