Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ object ExternalCatalogUtils {
}

def convertNullPartitionValues(spec: TablePartitionSpec): TablePartitionSpec = {
spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).toMap
spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).map(identity).toMap
Copy link
Member Author

@MaxGekk MaxGekk Jan 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise I got the "Task is not serializable" exception

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,15 @@ class InMemoryCatalog(
}
}

private def toCatalogPartitionSpec = ExternalCatalogUtils.convertNullPartitionValues(_)
private def toCatalogPartitionSpecs(specs: Seq[TablePartitionSpec]): Seq[TablePartitionSpec] = {
specs.map(toCatalogPartitionSpec)
}
private def toCatalogPartitionSpec(
parts: Seq[CatalogTablePartition]): Seq[CatalogTablePartition] = {
parts.map(part => part.copy(spec = toCatalogPartitionSpec(part.spec)))
}

// --------------------------------------------------------------------------
// Databases
// --------------------------------------------------------------------------
Expand Down Expand Up @@ -389,10 +398,11 @@ class InMemoryCatalog(
override def createPartitions(
db: String,
table: String,
parts: Seq[CatalogTablePartition],
newParts: Seq[CatalogTablePartition],
ignoreIfExists: Boolean): Unit = synchronized {
requireTableExists(db, table)
val existingParts = catalog(db).tables(table).partitions
val parts = toCatalogPartitionSpec(newParts)
if (!ignoreIfExists) {
val dupSpecs = parts.collect { case p if existingParts.contains(p.spec) => p.spec }
if (dupSpecs.nonEmpty) {
Expand Down Expand Up @@ -428,12 +438,13 @@ class InMemoryCatalog(
override def dropPartitions(
db: String,
table: String,
partSpecs: Seq[TablePartitionSpec],
parts: Seq[TablePartitionSpec],
ignoreIfNotExists: Boolean,
purge: Boolean,
retainData: Boolean): Unit = synchronized {
requireTableExists(db, table)
val existingParts = catalog(db).tables(table).partitions
val partSpecs = toCatalogPartitionSpecs(parts)
if (!ignoreIfNotExists) {
val missingSpecs = partSpecs.collect { case s if !existingParts.contains(s) => s }
if (missingSpecs.nonEmpty) {
Expand Down Expand Up @@ -467,8 +478,10 @@ class InMemoryCatalog(
override def renamePartitions(
db: String,
table: String,
specs: Seq[TablePartitionSpec],
newSpecs: Seq[TablePartitionSpec]): Unit = synchronized {
fromSpecs: Seq[TablePartitionSpec],
toSpecs: Seq[TablePartitionSpec]): Unit = synchronized {
val specs = toCatalogPartitionSpecs(fromSpecs)
val newSpecs = toCatalogPartitionSpecs(toSpecs)
require(specs.size == newSpecs.size, "number of old and new partition specs differ")
requirePartitionsExist(db, table, specs)
requirePartitionsNotExist(db, table, newSpecs)
Expand Down Expand Up @@ -507,7 +520,8 @@ class InMemoryCatalog(
override def alterPartitions(
db: String,
table: String,
parts: Seq[CatalogTablePartition]): Unit = synchronized {
alterParts: Seq[CatalogTablePartition]): Unit = synchronized {
val parts = toCatalogPartitionSpec(alterParts)
requirePartitionsExist(db, table, parts.map(p => p.spec))
parts.foreach { p =>
catalog(db).tables(table).partitions.put(p.spec, p)
Expand All @@ -517,15 +531,17 @@ class InMemoryCatalog(
override def getPartition(
db: String,
table: String,
spec: TablePartitionSpec): CatalogTablePartition = synchronized {
partSpec: TablePartitionSpec): CatalogTablePartition = synchronized {
val spec = toCatalogPartitionSpec(partSpec)
requirePartitionsExist(db, table, Seq(spec))
catalog(db).tables(table).partitions(spec)
}

override def getPartitionOption(
db: String,
table: String,
spec: TablePartitionSpec): Option[CatalogTablePartition] = synchronized {
partSpec: TablePartitionSpec): Option[CatalogTablePartition] = synchronized {
val spec = toCatalogPartitionSpec(partSpec)
if (!partitionExists(db, table, spec)) {
None
} else {
Expand All @@ -536,9 +552,9 @@ class InMemoryCatalog(
override def listPartitionNames(
db: String,
table: String,
partialSpec: Option[TablePartitionSpec] = None): Seq[String] = synchronized {
partSpec: Option[TablePartitionSpec] = None): Seq[String] = synchronized {
val partitionColumnNames = getTable(db, table).partitionColumnNames

val partialSpec = partSpec.map(toCatalogPartitionSpec)
listPartitions(db, table, partialSpec).map { partition =>
partitionColumnNames.map { name =>
val partValue = if (partition.spec(name) == null) {
Expand All @@ -557,7 +573,7 @@ class InMemoryCatalog(
partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = synchronized {
requireTableExists(db, table)

partialSpec match {
partialSpec.map(toCatalogPartitionSpec) match {
case None => catalog(db).tables(table).partitions.values.toSeq
case Some(partial) =>
catalog(db).tables(table).partitions.toSeq.collect {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3868,15 +3868,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark

assert(unions.size == 1)
}

test("SPARK-33591: null as a partition value") {
val t = "part_table"
withTable(t) {
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1)")
sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
checkAnswer(sql(s"SELECT * FROM $t"), Row(0, null))
}
}
}

case class Foo(bar: Option[String])
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,26 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
}
}

test("SPARK-33591, SPARK-34203: insert and drop partitions with null values") {
def insertAndDropNullPart(t: String, insertCmd: String): Unit = {
sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
sql(insertCmd)
checkPartitions(t, Map("p1" -> nullPartitionValue))
sql(s"ALTER TABLE $t DROP PARTITION (p1 = null)")
checkPartitions(t)
}

withNamespaceAndTable("ns", "tbl") { t =>
insertAndDropNullPart(t, s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
}

withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
withNamespaceAndTable("ns", "tbl") { t =>
insertAndDropNullPart(t, s"INSERT OVERWRITE TABLE $t VALUES (0, null)")
}
}
}

test("SPARK-34161, SPARK-34138, SPARK-34099: keep dependents cached after table altering") {
withNamespaceAndTable("ns", "tbl") { t =>
sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
Expand Down