apache · HeartSaVioR · Oct 13, 2020 · dongjoon-hyun · Oct 14, 2020 · HeartSaVioR
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -45,7 +45,7 @@ trait V2WriteCommand extends Command {
           case (inAttr, outAttr) =>
             // names and types must match, nullability must be compatible
             inAttr.name == outAttr.name &&
-              DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
+              DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) &&
               (outAttr.nullable || !inAttr.nullable)
         }
     }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -23,16 +23,15 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamedRelation, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, V2WriteCommand}
 import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
-import org.apache.spark.sql.types.TimestampType
+import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -101,6 +100,86 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     assert(v2.catalog.exists(_ == catalogPlugin))
   }
 
+  case class FakeV2WriteCommand(table: NamedRelation, query: LogicalPlan) extends V2WriteCommand
+
+  test("SPARK-33136 output resolved on complex types for V2 write commands") {
+    val tableCatalog = catalog("testcat")
+
+    def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
+      val fromTableName = s"from_table_$name"
+      tableCatalog.createTable(
+        Identifier.of(Array(), fromTableName),
+        StructType(Array(StructField("col", fromType))),
+        Array.empty,
+        new java.util.HashMap[String, String]())
+
+      val toTable = tableCatalog.createTable(
+        Identifier.of(Array(), s"to_table_$name"),
+        StructType(Array(StructField("col", toType))),
+        Array.empty,
+        new java.util.HashMap[String, String]())
+
+      val df = spark.table(s"testcat.$fromTableName")
+
+      val relation = DataSourceV2Relation.create(toTable, Some(tableCatalog), None)
+      val writeCommand = FakeV2WriteCommand(relation, df.queryExecution.analyzed)
+
+      assert(writeCommand.outputResolved, s"Unable to write from type $fromType to type $toType.")
+    }
+
+    // The major difference between `from` and `to` is that `from` is a complex type
+    // with non-nullable, whereas `to` is same data type with flipping nullable.
+
+    // nested struct type
+    val fromStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType, nullable = false),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType, nullable = false))))))
+
+    val toStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType))))))
+
+    assertTypeCompatibility("struct", fromStructType, toStructType)
+
+    // array type
+    assertTypeCompatibility("array", ArrayType(LongType, containsNull = false),
+      ArrayType(LongType, containsNull = true))
+
+    // array type with struct type
+    val fromArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      containsNull = false)
+
+    val toArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType))),
+      containsNull = true)
+
+    assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType)
+
+    // map type
+    assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false),
+      MapType(IntegerType, StringType, valueContainsNull = true))
+
+    // map type with struct type
+    val fromMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      valueContainsNull = false)
+
+    val toMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType))),
+      valueContainsNull = true)
+
+    assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType)
+  }
+
   test("Append: basic append") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")