-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-33474][SQL] Support TypeConstructed partition spec value #30421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 30 commits
adb1842
ae59115
c9a97d0
bcdc7e5
d171377
516c070
251c36b
1590e8a
6adefa7
05f1962
e312697
edd270e
a8f26a1
98986a0
e2749c3
713da66
4898fb4
e875bcf
0228292
2db3f14
055a903
e6092c1
06a9321
bc3e347
0358274
ca69533
894ac90
0b4b211
7264a3d
b68ee81
dc0783a
d3b1960
2e9058a
31c1092
9f5d569
5e05adb
dd34027
4cdb4a7
093b062
adc3984
f6bdbbe
2ef0fd0
4023041
fe5095a
63a4fb4
4f61e60
08c55f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,8 +40,8 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ] | |
|
|
||
| * **partition_spec** | ||
|
|
||
| An optional parameter that specifies a comma-separated list of key and value pairs | ||
| for partitions. | ||
| An optional parameter that specifies a comma separated list of key and value pairs | ||
AngersZhuuuu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for partitions. Note that one can use a typed literal (e.g., date'2019-01-02') for a partition column value. | ||
|
||
|
|
||
| **Syntax:** `PARTITION ( partition_col_name = partition_col_val [ , ... ] )` | ||
|
|
||
|
|
@@ -206,6 +206,19 @@ SELECT * FROM students; | |
| +-------------+--------------------------+----------+ | ||
| ``` | ||
|
|
||
| #### Insert Using a Typed Date Literal for a Partition Column Value | ||
| ```sql | ||
| CREATE TABLE students (name STRING, address STRING) PARTITIONED BY (birthday DATE); | ||
|
|
||
| INSERT INTO students PARTITION (birthday = date'2019-01-02') | ||
| VALUES ('Amy Smith', '123 Park Ave, San Jose'); | ||
|
|
||
| SELECT * FROM students; | ||
| +-------------+-------------------------+-----------+ | ||
| | name| address| birthday| | ||
| +-------------+-------------------------+-----------+ | ||
| | Amy Smith| 123 Park Ave, San Jose| 2019-01-02| | ||
| +-------------+-------------------------+-----------+ | ||
| #### Insert with a column list | ||
|
|
||
| ```sql | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -505,10 +505,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg | |
| protected def visitStringConstant( | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ctx: ConstantContext, | ||
| legacyNullAsString: Boolean): String = withOrigin(ctx) { | ||
| ctx match { | ||
| case _: NullLiteralContext if !legacyNullAsString => null | ||
| case s: StringLiteralContext => createString(s) | ||
| case o => o.getText | ||
| expression(ctx) match { | ||
| case l: Literal if l.value == null & !legacyNullAsString => null | ||
| case l: Literal => | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Cast(l, StringType, Some(SQLConf.get.sessionLocalTimeZone)).eval().toString | ||
| case _ => | ||
| throw new IllegalArgumentException("Only support convert Literal to string when visit" + | ||
|
||
| " partition spec value") | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,14 +20,14 @@ package org.apache.spark.sql.catalyst.parser | |
| import java.util.Locale | ||
|
|
||
| import org.apache.spark.sql.AnalysisException | ||
| import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} | ||
| import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedInlineTable, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView} | ||
| import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource} | ||
| import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal} | ||
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first} | ||
| import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} | ||
| import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType} | ||
| import org.apache.spark.unsafe.types.UTF8String | ||
| import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} | ||
|
|
||
| class DDLParserSuite extends AnalysisTest { | ||
| import CatalystSqlParser._ | ||
|
|
@@ -2500,4 +2500,27 @@ class DDLParserSuite extends AnalysisTest { | |
|
|
||
| testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false) | ||
| } | ||
|
|
||
| test("SPARK-33474: Support TypeConstructed partition spec value") { | ||
|
||
| def insertPartitionPlan(part: String): InsertIntoStatement = { | ||
| InsertIntoStatement( | ||
| UnresolvedRelation(Seq("t")), | ||
| Map("part" -> Some(part)), | ||
| Seq.empty[String], | ||
| UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a")))), | ||
| overwrite = false, ifPartitionNotExists = false) | ||
| } | ||
|
|
||
| val dateTypeSql = "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES('a')" | ||
| val interval = new CalendarInterval(7, 1, 1000).toString | ||
| val intervalTypeSql = s"INSERT INTO t PARTITION(part = interval'$interval') VALUES('a')" | ||
| val timestamp = "2019-01-02 11:11:11" | ||
| val timestampTypeSql = s"INSERT INTO t PARTITION(part = timestamp'$timestamp') VALUES('a')" | ||
| val binaryTypeSql = s"INSERT INTO t PARTITION(part = X'537061726B2053514C') VALUES('a')" | ||
|
||
|
|
||
| comparePlans(parsePlan(dateTypeSql), insertPartitionPlan("2019-01-02")) | ||
| comparePlans(parsePlan(intervalTypeSql), insertPartitionPlan(interval)) | ||
| comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp)) | ||
| comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan("Spark SQL")) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4021,6 +4021,27 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-33474: Support TypeConstructed partition spec value") { | ||
|
||
| withTable("t1", "t2", "t4") { | ||
| sql("CREATE TABLE t1(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)") | ||
| sql("INSERT INTO t1 PARTITION(part = date'2019-01-02') VALUES('a')") | ||
| checkAnswer(sql("SELECT name, CAST(part AS STRING) FROM t1"), Row("a", "2019-01-02")) | ||
|
|
||
| sql("CREATE TABLE t2(name STRING, part TIMESTAMP) USING PARQUET PARTITIONED BY (part)") | ||
| sql("INSERT INTO t2 PARTITION(part = timestamp'2019-01-02 11:11:11') VALUES('a')") | ||
| checkAnswer(sql("SELECT name, CAST(part AS STRING) FROM t2"), Row("a", "2019-01-02 11:11:11")) | ||
|
|
||
| val e = intercept[AnalysisException] { | ||
| sql("CREATE TABLE t3(name STRING, part INTERVAL) USING PARQUET PARTITIONED BY (part)") | ||
| }.getMessage | ||
| assert(e.contains("Cannot use interval for partition column")) | ||
|
|
||
| sql("CREATE TABLE t4(name STRING, part BINARY) USING CSV PARTITIONED BY (part)") | ||
| sql(s"INSERT INTO t4 PARTITION(part = X'537061726B2053514C') VALUES('a')") | ||
| checkAnswer(sql("SELECT name, cast(part as string) FROM t4"), Row("a", "Spark SQL")) | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The last thing that I'm concerned about is whether we already have tests corresponding to the @cloud-fan 's last comment.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For these three concern
Now type constructor only support
It's solved since #30538
Seems there is auto type conversion. I will check this with UT
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm Add some UT case. Got the concern that since now we treat partition spec use string, it's not type safe. So should we still continue this pr or just start work on treat partition value as Literal? |
||
| } | ||
| } | ||
|
|
||
| case class Foo(bar: Option[String]) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be
In Spark 3.2?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, yea, updated.