Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,8 @@ case class JsonTuple(children: Seq[Expression])
while (parser.nextToken() != JsonToken.END_OBJECT) {
if (parser.getCurrentToken == JsonToken.FIELD_NAME) {
// check to see if this field is desired in the output
val idx = fieldNames.indexOf(parser.getCurrentName)
val jsonField = parser.getCurrentName
var idx = fieldNames.indexOf(jsonField)
if (idx >= 0) {
// it is, copy the child tree to the correct location in the output row
val output = new ByteArrayOutputStream()
Expand All @@ -447,7 +448,14 @@ case class JsonTuple(children: Seq[Expression])
generator => copyCurrentStructure(generator, parser)
}

row(idx) = UTF8String.fromBytes(output.toByteArray)
val jsonValue = UTF8String.fromBytes(output.toByteArray)

// SPARK-21804: json_tuple returns null values within repeated columns
// except the first one; so that we need to check the remaining fields.
do {
row(idx) = jsonValue
idx = fieldNames.indexOf(jsonField, idx + 1)
} while (idx >= 0)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,16 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("2")))
}

test("SPARK-21804: json_tuple returns null values within repeated columns except the first one") {
checkJsonTuple(
JsonTuple(Literal("""{"f1": 1, "f2": 2}""") ::
NonFoldableLiteral("f1") ::
NonFoldableLiteral("cast(NULL AS STRING)") ::
NonFoldableLiteral("f1") ::
Nil),
InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1")))
}

val gmtId = Option(DateTimeUtils.TimeZoneGMT.getID)

test("from_json") {
Expand Down