@@ -117,14 +117,18 @@ private[parquet] object CatalystReadSupport {
117117 // Only clips array types with nested type as element type.
118118 clipParquetListType(parquetType.asGroupType(), t.elementType)
119119
120- case t : MapType if ! isPrimitiveCatalystType(t.valueType) =>
121- // Only clips map types with nested type as value type.
120+ case t : MapType
121+ if ! isPrimitiveCatalystType(t.keyType) ||
122+ ! isPrimitiveCatalystType(t.valueType) =>
123+ // Only clips map types with nested key type or value type
122124 clipParquetMapType(parquetType.asGroupType(), t.keyType, t.valueType)
123125
124126 case t : StructType =>
125127 clipParquetGroup(parquetType.asGroupType(), t)
126128
127129 case _ =>
130+ // UDTs and primitive types are not clipped. For UDTs, a clipped version might not be able
131+ // to be mapped to desired user-space types. So UDTs shouldn't participate schema merging.
128132 parquetType
129133 }
130134 }
@@ -204,14 +208,14 @@ private[parquet] object CatalystReadSupport {
204208 }
205209
206210 /**
207- * Clips a Parquet [[GroupType ]] which corresponds to a Catalyst [[MapType ]]. The value type
208- * of the [[MapType ]] should also be a nested type, namely an [[ArrayType ]], a [[MapType ]], or a
209- * [[StructType ]]. Note that key type of any [[ MapType ]] is always a primitive type .
211+ * Clips a Parquet [[GroupType ]] which corresponds to a Catalyst [[MapType ]]. Either key type or
212+ * value type of the [[MapType ]] must be a nested type, namely an [[ArrayType ]], a [[MapType ]], or
213+ * a [[StructType ]].
210214 */
211215 private def clipParquetMapType (
212216 parquetMap : GroupType , keyType : DataType , valueType : DataType ): GroupType = {
213- // Precondition of this method, should only be called for maps with nested value types.
214- assert(! isPrimitiveCatalystType(valueType))
217+ // Precondition of this method, only handles maps with nested key types or value types.
218+ assert(! isPrimitiveCatalystType(keyType) || ! isPrimitiveCatalystType( valueType))
215219
216220 val repeatedGroup = parquetMap.getType(0 ).asGroupType()
217221 val parquetKeyType = repeatedGroup.getType(0 )
@@ -221,7 +225,7 @@ private[parquet] object CatalystReadSupport {
221225 Types
222226 .repeatedGroup()
223227 .as(repeatedGroup.getOriginalType)
224- .addField(parquetKeyType)
228+ .addField(clipParquetType( parquetKeyType, keyType) )
225229 .addField(clipParquetType(parquetValueType, valueType))
226230 .named(repeatedGroup.getName)
227231
0 commit comments