Skip to content

Commit 521397f

Browse files
gengliangwangHyukjinKwon
authored andcommitted
[SPARK-33591][SQL][FOLLOWUP] Add legacy config for recognizing null partition spec values
### What changes were proposed in this pull request? This is a follow up for #30538. It adds a legacy conf `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` in case users wants the legacy behavior. It also adds document for the behavior change. ### Why are the changes needed? In case users want the legacy behavior, they can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true. ### Does this PR introduce _any_ user-facing change? Yes, adding a legacy configuration to restore the old behavior. ### How was this patch tested? Unit test. Closes #31421 from gengliangwang/legacyNullStringConstant. Authored-by: Gengliang Wang <[email protected]> Signed-off-by: HyukjinKwon <[email protected]>
1 parent f66e38c commit 521397f

File tree

5 files changed

+31
-4
lines changed

5 files changed

+31
-4
lines changed

docs/sql-migration-guide.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ license: |
4141

4242
- In Spark 3.2, the auto-generated `Cast` (such as those added by type coercion rules) will be stripped when generating column alias names. E.g., `sql("SELECT floor(1)").columns` will be `FLOOR(1)` instead of `FLOOR(CAST(1 AS DOUBLE))`.
4343

44+
- In Spark 3.2, a null partition value is parsed as it is. In Spark 3.1 or earlier, it is parsed as a string literal of its text representation, e.g., string "null". To restore the legacy behavior, you can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.
45+
4446
- In Spark 3.2, table refreshing clears cached data of the table as well as of all its dependents such as views while keeping the dependents cached. The following commands perform table refreshing:
4547
* `ALTER TABLE .. ADD PARTITION`
4648
* `ALTER TABLE .. RENAME PARTITION`

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
472472
*/
473473
override def visitPartitionSpec(
474474
ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
475+
val processNullLiteral =
476+
!conf.getConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL)
475477
val parts = ctx.partitionVal.asScala.map { pVal =>
476478
val name = pVal.identifier.getText
477-
val value = Option(pVal.constant).map(visitStringConstant)
479+
val value = Option(pVal.constant).map(v => visitStringConstant(v, processNullLiteral))
478480
name -> value
479481
}
480482
// Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
@@ -500,9 +502,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
500502
* main purpose is to prevent slight differences due to back to back conversions i.e.:
501503
* String -> Literal -> String.
502504
*/
503-
protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) {
505+
protected def visitStringConstant(
506+
ctx: ConstantContext,
507+
processNullLiteral: Boolean): String = withOrigin(ctx) {
504508
ctx match {
505-
case _: NullLiteralContext => null
509+
case _: NullLiteralContext if processNullLiteral => null
506510
case s: StringLiteralContext => createString(s)
507511
case o => o.getText
508512
}

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2472,6 +2472,16 @@ object SQLConf {
24722472
.booleanConf
24732473
.createWithDefault(true)
24742474

2475+
val LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL =
2476+
buildConf("spark.sql.legacy.parseNullPartitionSpecAsStringLiteral")
2477+
.internal()
2478+
.doc("If it is set to true, a null partition value is parsed as a string literal of its " +
2479+
"text representation, e.g., string 'null'. Otherwise, null partition values are parsed " +
2480+
"as they are.")
2481+
.version("3.2.0")
2482+
.booleanConf
2483+
.createWithDefault(false)
2484+
24752485
val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
24762486
buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
24772487
.internal()

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ class SparkSqlAstBuilder extends AstBuilder {
319319
* Convert a constants list into a String sequence.
320320
*/
321321
override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) {
322-
ctx.constant.asScala.map(visitStringConstant).toSeq
322+
ctx.constant.asScala.map(v => visitStringConstant(v, processNullLiteral = true)).toSeq
323323
}
324324

325325
/**

sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,15 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
154154
assert(partitions.first().getString(0) === "part=a")
155155
}
156156
}
157+
158+
test("SPARK-33591: null as string partition literal value 'null' after setting legacy conf") {
159+
withSQLConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
160+
withNamespaceAndTable("ns", "tbl") { t =>
161+
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
162+
sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
163+
runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("p1=null") :: Nil)
164+
runShowPartitionsSql(s"SHOW PARTITIONS $t PARTITION (p1 = null)", Row("p1=null") :: Nil)
165+
}
166+
}
167+
}
157168
}

0 commit comments

Comments
 (0)