Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ object ExternalCatalogUtils {
}

def convertNullPartitionValues(spec: TablePartitionSpec): TablePartitionSpec = {
spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).toMap
spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).map(identity).toMap
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ class InMemoryCatalog(
}
}

private def toCatalogPartitionSpec = ExternalCatalogUtils.convertNullPartitionValues(_)
private def toCatalogPartitionSpecs(specs: Seq[TablePartitionSpec]): Seq[TablePartitionSpec] = {
specs.map(toCatalogPartitionSpec)
}
private def toCatalogPartitionSpec(
parts: Seq[CatalogTablePartition]): Seq[CatalogTablePartition] = {
parts.map(part => part.copy(spec = toCatalogPartitionSpec(part.spec)))
}

// --------------------------------------------------------------------------
// Databases
// --------------------------------------------------------------------------
Expand Down Expand Up @@ -389,10 +398,11 @@ class InMemoryCatalog(
override def createPartitions(
db: String,
table: String,
parts: Seq[CatalogTablePartition],
newParts: Seq[CatalogTablePartition],
ignoreIfExists: Boolean): Unit = synchronized {
requireTableExists(db, table)
val existingParts = catalog(db).tables(table).partitions
val parts = toCatalogPartitionSpec(newParts)
if (!ignoreIfExists) {
val dupSpecs = parts.collect { case p if existingParts.contains(p.spec) => p.spec }
if (dupSpecs.nonEmpty) {
Expand Down Expand Up @@ -428,12 +438,13 @@ class InMemoryCatalog(
override def dropPartitions(
db: String,
table: String,
partSpecs: Seq[TablePartitionSpec],
parts: Seq[TablePartitionSpec],
ignoreIfNotExists: Boolean,
purge: Boolean,
retainData: Boolean): Unit = synchronized {
requireTableExists(db, table)
val existingParts = catalog(db).tables(table).partitions
val partSpecs = toCatalogPartitionSpecs(parts)
if (!ignoreIfNotExists) {
val missingSpecs = partSpecs.collect { case s if !existingParts.contains(s) => s }
if (missingSpecs.nonEmpty) {
Expand Down Expand Up @@ -467,8 +478,10 @@ class InMemoryCatalog(
override def renamePartitions(
db: String,
table: String,
specs: Seq[TablePartitionSpec],
newSpecs: Seq[TablePartitionSpec]): Unit = synchronized {
fromSpecs: Seq[TablePartitionSpec],
toSpecs: Seq[TablePartitionSpec]): Unit = synchronized {
val specs = toCatalogPartitionSpecs(fromSpecs)
val newSpecs = toCatalogPartitionSpecs(toSpecs)
require(specs.size == newSpecs.size, "number of old and new partition specs differ")
requirePartitionsExist(db, table, specs)
requirePartitionsNotExist(db, table, newSpecs)
Expand Down Expand Up @@ -507,7 +520,8 @@ class InMemoryCatalog(
override def alterPartitions(
db: String,
table: String,
parts: Seq[CatalogTablePartition]): Unit = synchronized {
alterParts: Seq[CatalogTablePartition]): Unit = synchronized {
val parts = toCatalogPartitionSpec(alterParts)
requirePartitionsExist(db, table, parts.map(p => p.spec))
parts.foreach { p =>
catalog(db).tables(table).partitions.put(p.spec, p)
Expand All @@ -517,15 +531,17 @@ class InMemoryCatalog(
override def getPartition(
db: String,
table: String,
spec: TablePartitionSpec): CatalogTablePartition = synchronized {
partSpec: TablePartitionSpec): CatalogTablePartition = synchronized {
val spec = toCatalogPartitionSpec(partSpec)
requirePartitionsExist(db, table, Seq(spec))
catalog(db).tables(table).partitions(spec)
}

override def getPartitionOption(
db: String,
table: String,
spec: TablePartitionSpec): Option[CatalogTablePartition] = synchronized {
partSpec: TablePartitionSpec): Option[CatalogTablePartition] = synchronized {
val spec = toCatalogPartitionSpec(partSpec)
if (!partitionExists(db, table, spec)) {
None
} else {
Expand All @@ -536,9 +552,9 @@ class InMemoryCatalog(
override def listPartitionNames(
db: String,
table: String,
partialSpec: Option[TablePartitionSpec] = None): Seq[String] = synchronized {
partSpec: Option[TablePartitionSpec] = None): Seq[String] = synchronized {
val partitionColumnNames = getTable(db, table).partitionColumnNames

val partialSpec = partSpec.map(toCatalogPartitionSpec)
listPartitions(db, table, partialSpec).map { partition =>
partitionColumnNames.map { name =>
val partValue = if (partition.spec(name) == null) {
Expand All @@ -557,7 +573,7 @@ class InMemoryCatalog(
partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = synchronized {
requireTableExists(db, table)

partialSpec match {
partialSpec.map(toCatalogPartitionSpec) match {
case None => catalog(db).tables(table).partitions.values.toSeq
case Some(partial) =>
catalog(db).tables(table).partitions.toSeq.collect {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3575,15 +3575,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
})
}

test("SPARK-33591: null as a partition value") {
val t = "part_table"
withTable(t) {
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1)")
sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
checkAnswer(sql(s"SELECT * FROM $t"), Row(0, null))
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A simpler way is to test DROP PARTITION here.

Copy link
Member Author

@MaxGekk MaxGekk Jan 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New test covers both v1 In-Memory and Hive external catalogs because it runs as a part of InMemoryCatalogedDDLSuite and HiveCatalogedDDLSuite.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, then please forward port the new test to branch-3.1, as I used this smaller change while backporting.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it is #31331

}
}

case class Foo(bar: Option[String])
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.connector.catalog.CatalogManager
import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
import org.apache.spark.sql.execution.datasources.PartitioningUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
Expand Down Expand Up @@ -1734,9 +1735,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {

// null partition values
createTablePartition(catalog, Map("a" -> null, "b" -> null), tableIdent)
val nullPartValue = if (isUsingHiveMetastore) "__HIVE_DEFAULT_PARTITION__" else null
assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
Set(Map("a" -> nullPartValue, "b" -> nullPartValue)))
Set(Map("a" -> "__HIVE_DEFAULT_PARTITION__", "b" -> "__HIVE_DEFAULT_PARTITION__")))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now, the In-Memory catalog behaves similarly to Hive external catalog, so, we don't need to distinguish them in tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we do it in master branch as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In master, we already have common settings in unified tests:

override protected def nullPartitionValue: String = "__HIVE_DEFAULT_PARTITION__"

sql("ALTER TABLE tab1 DROP PARTITION (a = null, b = null)")
assert(catalog.listPartitions(tableIdent).isEmpty)
}
Expand Down Expand Up @@ -3091,6 +3091,35 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
assert(sql(s"SHOW TABLE EXTENDED LIKE '$t' PARTITION(a = 1)").count() === 1)
}
}

test("SPARK-33591, SPARK-34203: insert and drop partitions with null values") {
def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
val partitions = sql(s"SHOW PARTITIONS $t")
.collect()
.toSet
.map((row: Row) => row.getString(0))
.map(PartitioningUtils.parsePathFragment)
assert(partitions === expected.toSet)
}
val defaultUsing = "USING " + (if (isUsingHiveMetastore) "hive" else "parquet")
def insertAndDropNullPart(t: String, insertCmd: String): Unit = {
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
sql(insertCmd)
checkPartitions(t, Map("p1" -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME))
sql(s"ALTER TABLE $t DROP PARTITION (p1 = null)")
checkPartitions(t)
}

withTable("tbl") {
insertAndDropNullPart("tbl", s"INSERT INTO TABLE tbl PARTITION (p1 = null) SELECT 0")
}

withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
withTable("tbl") {
insertAndDropNullPart("tbl", s"INSERT OVERWRITE TABLE tbl VALUES (0, null)")
}
}
}
}

object FakeLocalFsFileSystem {
Expand Down