apache · AngersZhuuuu · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020 · Nov 19, 2020
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
@@ -101,6 +101,8 @@ license: |
 
   - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`.
 
+  - In Spark 3.1, we support using corresponding typed literal of partition column value type as partition column value in SQL, such as if we have a partition table with partition column of date type, we can use typed date literal `date '2020-01-01'` as partition spec `PARTITION (dt = date '2020-01-01')`, it will be treated as partition column value `2020-01-01`. In Spark 3.0 the partition value will be treated as string value `date '2020-01-01'` and it's a illegal date type string value and will been converted to `__HIVE_DEFAULT_PARTITION__`.
+
   - In Spark 3.1, `AnalysisException` is replaced by its sub-classes that are thrown for tables from Hive external catalog in the following situations:
     * `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
     * `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions

diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
@@ -40,8 +40,8 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ]
 
 * **partition_spec**
 
-    An optional parameter that specifies a comma-separated list of key and value pairs
-    for partitions.
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions. Note that one can use a typed literal (e.g., date'2019-01-02') for a partition column value.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
@@ -206,6 +206,19 @@ SELECT * FROM students;
 +-------------+--------------------------+----------+
 ```
 
+#### Insert Using a Typed Date Literal for a Partition Column Value
+```sql
+CREATE TABLE students (name STRING, address  STRING) PARTITIONED BY (birthday DATE);
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES ('Amy Smith', '123 Park Ave, San Jose');
+
+SELECT * FROM students;
++-------------+-------------------------+-----------+
+|         name|                  address|   birthday|
++-------------+-------------------------+-----------+
+|    Amy Smith|   123 Park Ave, San Jose| 2019-01-02|
++-------------+-------------------------+-----------+
 #### Insert with a column list
 
 ```sql

diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -40,8 +40,8 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
 
 * **partition_spec**
 
-    An optional parameter that specifies a comma-separated list of key and value pairs
-    for partitions.
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions. Note that one can use a typed literal (e.g., date'2019-01-02') for a partition column value.
 
     **Syntax:** `PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )`
 
@@ -179,6 +179,29 @@ SELECT * FROM students;
 +-----------+-------------------------+----------+
 ```
 
+#### Insert Using a Typed Date Literal for a Partition Column Value
+```sql
+CREATE TABLE students (name STRING, address  STRING) PARTITIONED BY (birthday DATE);
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES ('Amy Smith', '123 Park Ave, San Jose');
+
+SELECT * FROM students;
++-------------+-------------------------+-----------+
+|         name|                  address|   birthday|
++-------------+-------------------------+-----------+
+|    Amy Smith|   123 Park Ave, San Jose| 2019-01-02|
++-------------+-------------------------+-----------+
+
+INSERT INTO students PARTITION (birthday = date'2019-01-02')
+    VALUES('Jason Wang', '908 Bird St, Saratoga');
+
+SELECT * FROM students;
++-----------+-------------------------+-----------+
+|       name|                  address|   birthday|
++-----------+-------------------------+-----------+
+| Jason Wang|    908 Bird St, Saratoga| 2019-01-02|
++-----------+-------------------------+-----------+
 #### Insert with a column list
 
 ```sql

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -505,10 +505,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   protected def visitStringConstant(
       ctx: ConstantContext,
       legacyNullAsString: Boolean): String = withOrigin(ctx) {
-    ctx match {
-      case _: NullLiteralContext if !legacyNullAsString => null
-      case s: StringLiteralContext => createString(s)
-      case o => o.getText
+    expression(ctx) match {
+      case l: Literal if l.value == null & !legacyNullAsString => null
+      case l: Literal =>
+        Cast(l, StringType, Some(SQLConf.get.sessionLocalTimeZone)).eval().toString
+      case _ =>
+        throw new IllegalArgumentException("Only support convert Literal to string when visit" +
+          " partition spec value")
     }
   }
 

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,14 +20,14 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedInlineTable, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class DDLParserSuite extends AnalysisTest {
   import CatalystSqlParser._
@@ -2500,4 +2500,27 @@ class DDLParserSuite extends AnalysisTest {
 
     testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
   }
+
+  test("SPARK-33474: Support TypeConstructed partition spec value") {
+    def insertPartitionPlan(part: String): InsertIntoStatement = {
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("t")),
+        Map("part" -> Some(part)),
+        Seq.empty[String],
+        UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a")))),
+        overwrite = false, ifPartitionNotExists = false)
+    }
+
+    val dateTypeSql = "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES('a')"
+    val interval = new CalendarInterval(7, 1, 1000).toString
+    val intervalTypeSql = s"INSERT INTO t PARTITION(part = interval'$interval') VALUES('a')"
+    val timestamp = "2019-01-02 11:11:11"
+    val timestampTypeSql = s"INSERT INTO t PARTITION(part = timestamp'$timestamp') VALUES('a')"
+    val binaryTypeSql = s"INSERT INTO t PARTITION(part = X'537061726B2053514C') VALUES('a')"
+
+    comparePlans(parsePlan(dateTypeSql), insertPartitionPlan("2019-01-02"))
+    comparePlans(parsePlan(intervalTypeSql), insertPartitionPlan(interval))
+    comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
+    comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan("Spark SQL"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4021,6 +4021,27 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-33474: Support TypeConstructed partition spec value") {
+    withTable("t1", "t2", "t4") {
+      sql("CREATE TABLE t1(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)")
+      sql("INSERT INTO t1 PARTITION(part = date'2019-01-02') VALUES('a')")
+      checkAnswer(sql("SELECT name, CAST(part AS STRING) FROM t1"), Row("a", "2019-01-02"))
+
+      sql("CREATE TABLE t2(name STRING, part TIMESTAMP) USING PARQUET PARTITIONED BY (part)")
+      sql("INSERT INTO t2 PARTITION(part = timestamp'2019-01-02 11:11:11') VALUES('a')")
+      checkAnswer(sql("SELECT name, CAST(part AS STRING) FROM t2"), Row("a", "2019-01-02 11:11:11"))
+
+      val e = intercept[AnalysisException] {
+        sql("CREATE TABLE t3(name STRING, part INTERVAL) USING PARQUET PARTITIONED BY (part)")
+      }.getMessage
+      assert(e.contains("Cannot use interval for partition column"))
+
+      sql("CREATE TABLE t4(name STRING, part BINARY) USING CSV PARTITIONED BY (part)")
+      sql(s"INSERT INTO t4 PARTITION(part = X'537061726B2053514C') VALUES('a')")
+      checkAnswer(sql("SELECT name, cast(part as string) FROM t4"), Row("a", "Spark SQL"))
+    }
+  }
 }
 
 case class Foo(bar: Option[String])