@@ -68,8 +68,15 @@ private[sql] object JsonRDD extends Logging {
6868 val (topLevel, structLike) = values.partition(_.size == 1 )
6969 val topLevelFields = topLevel.filter {
7070 name => resolved.get(prefix ++ name).get match {
71- case ArrayType (StructType (Nil ), _) => false
72- case ArrayType (_, _) => true
71+ case ArrayType (elementType, _) => {
72+ def hasInnerStruct (t : DataType ): Boolean = t match {
73+ case s : StructType => false
74+ case ArrayType (t1, _) => hasInnerStruct(t1)
75+ case o => true
76+ }
77+
78+ hasInnerStruct(elementType)
79+ }
7380 case struct : StructType => false
7481 case _ => true
7582 }
@@ -84,7 +91,18 @@ private[sql] object JsonRDD extends Logging {
8491 val dataType = resolved.get(prefix :+ name).get
8592 dataType match {
8693 case array : ArrayType =>
87- Some (StructField (name, ArrayType (structType, array.containsNull), nullable = true ))
94+ // The pattern of this array is ArrayType(...(ArrayType(StructType))).
95+ // Since the inner struct of array is a placeholder (StructType(Nil)),
96+ // we need to replace this placeholder with the actual StructType (structType).
97+ def getActualArrayType (
98+ innerStruct : StructType ,
99+ currentArray : ArrayType ): ArrayType = currentArray match {
100+ case ArrayType (s : StructType , containsNull) =>
101+ ArrayType (innerStruct, containsNull)
102+ case ArrayType (a : ArrayType , containsNull) =>
103+ ArrayType (getActualArrayType(innerStruct, a), containsNull)
104+ }
105+ Some (StructField (name, getActualArrayType(structType, array), nullable = true ))
88106 case struct : StructType => Some (StructField (name, structType, nullable = true ))
89107 // dataType is StringType means that we have resolved type conflicts involving
90108 // primitive types and complex types. So, the type of name has been relaxed to
@@ -168,8 +186,7 @@ private[sql] object JsonRDD extends Logging {
168186 /**
169187 * Returns the element type of an JSON array. We go through all elements of this array
170188 * to detect any possible type conflict. We use [[compatibleType ]] to resolve
171- * type conflicts. Right now, when the element of an array is another array, we
172- * treat the element as String.
189+ * type conflicts.
173190 */
174191 private def typeOfArray (l : Seq [Any ]): ArrayType = {
175192 val containsNull = l.exists(v => v == null )
@@ -216,18 +233,24 @@ private[sql] object JsonRDD extends Logging {
216233 }
217234 case (key : String , array : Seq [_]) => {
218235 // The value associated with the key is an array.
219- typeOfArray(array) match {
236+ // Handle inner structs of an array.
237+ def buildKeyPathForInnerStructs (v : Any , t : DataType ): Seq [(String , DataType )] = t match {
220238 case ArrayType (StructType (Nil ), containsNull) => {
221239 // The elements of this arrays are structs.
222- array .asInstanceOf [Seq [Map [String , Any ]]].flatMap {
240+ v .asInstanceOf [Seq [Map [String , Any ]]].flatMap {
223241 element => allKeysWithValueTypes(element)
224242 }.map {
225- case (k, dataType ) => (s " $key. $k" , dataType )
226- } :+ (key, ArrayType ( StructType ( Nil ), containsNull))
243+ case (k, t ) => (s " $key. $k" , t )
244+ }
227245 }
228- case ArrayType (elementType, containsNull) =>
229- (key, ArrayType (elementType, containsNull)) :: Nil
246+ case ArrayType (t1, containsNull) =>
247+ v.asInstanceOf [Seq [Any ]].flatMap {
248+ element => buildKeyPathForInnerStructs(element, t1)
249+ }
250+ case other => Nil
230251 }
252+ val elementType = typeOfArray(array)
253+ buildKeyPathForInnerStructs(array, elementType) :+ (key, elementType)
231254 }
232255 case (key : String , value) => (key, typeOfPrimitiveValue(value)) :: Nil
233256 }
@@ -339,15 +362,17 @@ private[sql] object JsonRDD extends Logging {
339362 null
340363 } else {
341364 desiredType match {
342- case ArrayType (elementType, _) =>
343- value.asInstanceOf [Seq [Any ]].map(enforceCorrectType(_, elementType))
344365 case StringType => toString(value)
345366 case IntegerType => value.asInstanceOf [IntegerType .JvmType ]
346367 case LongType => toLong(value)
347368 case DoubleType => toDouble(value)
348369 case DecimalType => toDecimal(value)
349370 case BooleanType => value.asInstanceOf [BooleanType .JvmType ]
350371 case NullType => null
372+
373+ case ArrayType (elementType, _) =>
374+ value.asInstanceOf [Seq [Any ]].map(enforceCorrectType(_, elementType))
375+ case struct : StructType => asRow(value.asInstanceOf [Map [String , Any ]], struct)
351376 }
352377 }
353378 }
@@ -356,22 +381,9 @@ private[sql] object JsonRDD extends Logging {
356381 // TODO: Reuse the row instead of creating a new one for every record.
357382 val row = new GenericMutableRow (schema.fields.length)
358383 schema.fields.zipWithIndex.foreach {
359- // StructType
360- case (StructField (name, fields : StructType , _), i) =>
361- row.update(i, json.get(name).flatMap(v => Option (v)).map(
362- v => asRow(v.asInstanceOf [Map [String , Any ]], fields)).orNull)
363-
364- // ArrayType(StructType)
365- case (StructField (name, ArrayType (structType : StructType , _), _), i) =>
366- row.update(i,
367- json.get(name).flatMap(v => Option (v)).map(
368- v => v.asInstanceOf [Seq [Any ]].map(
369- e => asRow(e.asInstanceOf [Map [String , Any ]], structType))).orNull)
370-
371- // Other cases
372384 case (StructField (name, dataType, _), i) =>
373385 row.update(i, json.get(name).flatMap(v => Option (v)).map(
374- enforceCorrectType(_, dataType)).getOrElse( null ) )
386+ enforceCorrectType(_, dataType)).orNull )
375387 }
376388
377389 row
0 commit comments