Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,6 @@ class JacksonParser(
case udt: UserDefinedType[_] =>
makeConverter(udt.sqlType)

case _: NullType if options.dropFieldIfAllNull =>
(parser: JsonParser) => parseJsonToken[Null](parser, dataType) {
case _ => null
}

case _ =>
(parser: JsonParser) =>
// Here, we pass empty `PartialFunction` so that this case can be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,44 +176,33 @@ private[sql] object JsonInferSchema {
}

/**
* Canonicalize inferred types, e.g., convert NullType to StringType and remove StructTypes
* with no fields.
* Recursively canonicalizes inferred types, e.g., removes StructTypes with no fields,
* drops NullTypes or converts them to StringType based on provided options.
*/
private def canonicalizeType(tpe: DataType, options: JSONOptions): Option[DataType] = tpe match {
case at @ ArrayType(elementType, _) =>
val canonicalizeArrayOption = for {
canonicalType <- canonicalizeType(elementType, options)
} yield {
at.copy(canonicalType)
}

canonicalizeArrayOption.map { array =>
if (options.dropFieldIfAllNull && array.elementType == NullType) {
NullType
} else {
array
}
}
case at: ArrayType =>
canonicalizeType(at.elementType, options)
.map(t => at.copy(elementType = t))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need this copy?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this might change ArrayType(ArrayType(NullType)) to ArrayType(ArrayType(StringType))

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aha, ok. Thanks!


case StructType(fields) =>
val canonicalFields: Array[StructField] = for {
field <- fields
if field.name.length > 0
canonicalType <- canonicalizeType(field.dataType, options)
} yield {
field.copy(dataType = canonicalType)
val canonicalFields = fields.filter(_.name.nonEmpty).flatMap { f =>
canonicalizeType(f.dataType, options)
.map(t => f.copy(dataType = t))
}

if (canonicalFields.length > 0) {
Some(StructType(canonicalFields))
} else if (options.dropFieldIfAllNull) {
Some(NullType)
// SPARK-8093: empty structs should be deleted
if (canonicalFields.isEmpty) {
None
} else {
// per SPARK-8093: empty structs should be deleted
Some(StructType(canonicalFields))
}

case NullType =>
if (options.dropFieldIfAllNull) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only place we check dropFieldIfAllNull.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

None
} else {
Some(StringType)
}

case NullType if !options.dropFieldIfAllNull => Some(StringType)
case other => Some(other)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2423,10 +2423,9 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
.option("dropFieldIfAllNull", true)
.load(path)
var expectedSchema = new StructType()
.add("a", NullType).add("b", LongType).add("c", StringType)
.add("b", LongType).add("c", StringType)
assert(df.schema === expectedSchema)
checkAnswer(df, Row(null, 1, "3.0") :: Row(null, null, "string") :: Row(null, null, null)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, thanks. I misunderstood the behaviour.

:: Nil)
checkAnswer(df, Row(1, "3.0") :: Row(null, "string") :: Row(null, null) :: Nil)

// arrays
Seq(
Expand All @@ -2438,17 +2437,14 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
.option("dropFieldIfAllNull", true)
.load(path)
expectedSchema = new StructType()
.add("a", ArrayType(LongType)).add("b", NullType).add("c", NullType).add("d", NullType)
.add("e", NullType)
.add("a", ArrayType(LongType))
assert(df.schema === expectedSchema)
checkAnswer(df, Row(Array(2, 1), null, null, null, null) ::
Row(Array(null), null, null, null, null) ::
Row(null, null, null, null, null) :: Nil)
checkAnswer(df, Row(Array(2, 1)) :: Row(Array(null)) :: Row(null) :: Nil)

// structs
Seq(
"""{"a":{"a1": 1, "a2":"string"}, "b":{}}""",
"""{"a":{"a1": 2, "a2":null}, "b":{}}""",
"""{"a":{"a1": 2, "a2":null}, "b":{"b1":[null]}}""",
"""{"a":null, "b":null}""")
.toDS().write.mode("overwrite").text(path)
df = spark.read.format("json")
Expand All @@ -2457,10 +2453,8 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
expectedSchema = new StructType()
.add("a", StructType(StructField("a1", LongType) :: StructField("a2", StringType)
:: Nil))
.add("b", NullType)
assert(df.schema === expectedSchema)
checkAnswer(df, Row(Row(1, "string"), null) :: Row(Row(2, null), null) ::
Row(null, null) :: Nil)
checkAnswer(df, Row(Row(1, "string")) :: Row(Row(2, null)) :: Row(null) :: Nil)
}
}
}