Skip to content

Commit 4398e77

Browse files
committed
Address comments.
1 parent 2515d78 commit 4398e77

File tree

3 files changed

+41
-21
lines changed

3 files changed

+41
-21
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,18 @@ object ResolveUnion extends Rule[LogicalPlan] {
4444
val resolver = SQLConf.get.resolver
4545
val missingFields =
4646
StructType.findMissingFields(col.dataType.asInstanceOf[StructType], target, resolver)
47-
if (missingFields.length == 0) {
47+
if (missingFields.isEmpty) {
4848
None
4949
} else {
50-
Some(addFieldsInto(col, "", missingFields.fields))
50+
missingFields.map(s => addFieldsInto(col, "", s.fields))
5151
}
5252
}
5353

5454
/**
55-
* Adds missing fields recursively into given `col` expression. The missing fields are given
56-
* in `fields`. For example, given `col` as "a struct<a:int, b:int>, b int", and `fields` is
57-
* "a struct<c:long>, c string". This method will add a nested `a.c` field and a top-level
58-
* `c` field to `col` and fill null values for them.
55+
* Adds missing fields recursively into given `col` expression. The missing fields are given
56+
* in `fields`. For example, given `col` as "a struct<a:int, b:int>, b int", and `fields` is
57+
* "a struct<c:long>, c string". This method will add a nested `a.c` field and a top-level
58+
* `c` field to `col` and fill null values for them.
5959
*/
6060
private def addFieldsInto(col: Expression, base: String, fields: Seq[StructField]): Expression = {
6161
fields.foldLeft(col) { case (currCol, field) =>
@@ -134,6 +134,7 @@ object ResolveUnion extends Rule[LogicalPlan] {
134134
// Builds a project list for `right` based on `left` output names
135135
val (rightProjectList, aliased) = compareAndAddFields(left, right, allowMissingCol)
136136

137+
137138
// Delegates failure checks to `CheckAnalysis`
138139
val notFoundAttrs = rightOutputAttrs.diff(rightProjectList ++ aliased)
139140
val rightChild = Project(rightProjectList ++ notFoundAttrs, right)

sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,10 @@ object StructType extends AbstractDataType {
646646
* Returns a `StructType` that contains missing fields recursively from `source` to `target`.
647647
* Note that this doesn't support looking into array type and map type recursively.
648648
*/
649-
def findMissingFields(source: StructType, target: StructType, resolver: Resolver): StructType = {
649+
def findMissingFields(
650+
source: StructType,
651+
target: StructType,
652+
resolver: Resolver): Option[StructType] = {
650653
def bothStructType(dt1: DataType, dt2: DataType): Boolean =
651654
dt1.isInstanceOf[StructType] && dt2.isInstanceOf[StructType]
652655

@@ -660,11 +663,17 @@ object StructType extends AbstractDataType {
660663
} else if (bothStructType(found.get.dataType, field.dataType) &&
661664
!found.get.dataType.sameType(field.dataType)) {
662665
// Found a field with same name, but different data type.
663-
newFields += found.get.copy(dataType =
664-
findMissingFields(found.get.dataType.asInstanceOf[StructType],
665-
field.dataType.asInstanceOf[StructType], resolver))
666+
findMissingFields(found.get.dataType.asInstanceOf[StructType],
667+
field.dataType.asInstanceOf[StructType], resolver).map { missingType =>
668+
newFields += found.get.copy(dataType = missingType)
669+
}
666670
}
667671
}
668-
StructType(newFields)
672+
673+
if (newFields.isEmpty) {
674+
None
675+
} else {
676+
Some(StructType(newFields))
677+
}
669678
}
670679
}

sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,61 +113,71 @@ class StructTypeSuite extends SparkFunSuite {
113113
val source1 = StructType.fromDDL("c1 INT")
114114
val missing1 = StructType.fromDDL(
115115
"c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
116-
assert(StructType.findMissingFields(source1, schema, resolver).sameType(missing1))
116+
assert(StructType.findMissingFields(source1, schema, resolver)
117+
.map(_.sameType(missing1)).getOrElse(false))
117118

118119
val source2 = StructType.fromDDL("c1 INT, c3 STRING")
119120
val missing2 = StructType.fromDDL(
120121
"c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
121-
assert(StructType.findMissingFields(source2, schema, resolver).sameType(missing2))
122+
assert(StructType.findMissingFields(source2, schema, resolver)
123+
.map(_.sameType(missing2)).getOrElse(false))
122124

123125
val source3 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT>")
124126
val missing3 = StructType.fromDDL(
125127
"c2 STRUCT<c4: STRUCT<c5: INT, c6: INT>>")
126-
assert(StructType.findMissingFields(source3, schema, resolver).sameType(missing3))
128+
assert(StructType.findMissingFields(source3, schema, resolver)
129+
.map(_.sameType(missing3)).getOrElse(false))
127130

128131
val source4 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c6: INT>>")
129132
val missing4 = StructType.fromDDL(
130133
"c2 STRUCT<c4: STRUCT<c5: INT>>")
131-
assert(StructType.findMissingFields(source4, schema, resolver).sameType(missing4))
134+
assert(StructType.findMissingFields(source4, schema, resolver)
135+
.map(_.sameType(missing4)).getOrElse(false))
132136

133137
val schemaWithArray = StructType.fromDDL(
134138
"c1 INT, c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
135139
val source5 = StructType.fromDDL(
136140
"c1 INT")
137141
val missing5 = StructType.fromDDL(
138142
"c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
139-
assert(StructType.findMissingFields(source5, schemaWithArray, resolver).sameType(missing5))
143+
assert(
144+
StructType.findMissingFields(source5, schemaWithArray, resolver)
145+
.map(_.sameType(missing5)).getOrElse(false))
140146

141147
val schemaWithMap1 = StructType.fromDDL(
142148
"c1 INT, c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>, c3 LONG")
143149
val source6 = StructType.fromDDL(
144150
"c1 INT, c3 LONG")
145151
val missing6 = StructType.fromDDL(
146152
"c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>")
147-
assert(StructType.findMissingFields(source6, schemaWithMap1, resolver).sameType(missing6))
153+
assert(
154+
StructType.findMissingFields(source6, schemaWithMap1, resolver)
155+
.map(_.sameType(missing6)).getOrElse(false))
148156

149157
val schemaWithMap2 = StructType.fromDDL(
150158
"c1 INT, c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>, c3 STRING")
151159
val source7 = StructType.fromDDL(
152160
"c1 INT, c3 STRING")
153161
val missing7 = StructType.fromDDL(
154162
"c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>")
155-
assert(StructType.findMissingFields(source7, schemaWithMap2, resolver).sameType(missing7))
163+
assert(
164+
StructType.findMissingFields(source7, schemaWithMap2, resolver)
165+
.map(_.sameType(missing7)).getOrElse(false))
156166

157167
// Unsupported: nested struct in array, map
158168
val source8 = StructType.fromDDL(
159169
"c1 INT, c2 ARRAY<STRUCT<c3: INT>>")
160170
// `findMissingFields` doesn't support looking into nested struct in array type.
161-
assert(StructType.findMissingFields(source8, schemaWithArray, resolver).length == 0)
171+
assert(StructType.findMissingFields(source8, schemaWithArray, resolver).isEmpty)
162172

163173
val source9 = StructType.fromDDL(
164174
"c1 INT, c2 MAP<STRUCT<c3: INT>, STRING>, c3 LONG")
165175
// `findMissingFields` doesn't support looking into nested struct in map type.
166-
assert(StructType.findMissingFields(source9, schemaWithMap1, resolver).length == 0)
176+
assert(StructType.findMissingFields(source9, schemaWithMap1, resolver).isEmpty)
167177

168178
val source10 = StructType.fromDDL(
169179
"c1 INT, c2 MAP<STRING, STRUCT<c3: INT>>, c3 STRING")
170180
// `findMissingFields` doesn't support looking into nested struct in map type.
171-
assert(StructType.findMissingFields(source10, schemaWithMap2, resolver).length == 0)
181+
assert(StructType.findMissingFields(source10, schemaWithMap2, resolver).isEmpty)
172182
}
173183
}

0 commit comments

Comments
 (0)