diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9e325d0c2e4e1..9c99acaa994b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -337,7 +337,8 @@ trait CheckAnalysis extends PredicateHelper { def ordinalNumber(i: Int): String = i match { case 0 => "first" case 1 => "second" - case i => s"${i}th" + case 2 => "third" + case i => s"${i + 1}th" } val ref = dataTypes(operator.children.head) operator.children.tail.zipWithIndex.foreach { case (child, ti) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 189152374b0d1..c15ec49e14282 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -831,4 +831,57 @@ class AnalysisSuite extends AnalysisTest with Matchers { } } } + + test("SPARK-32131: Fix wrong column index when we have more than two columns" + + " during union and set operations" ) { + val firstTable = LocalRelation( + AttributeReference("a", StringType)(), + AttributeReference("b", DoubleType)(), + AttributeReference("c", IntegerType)(), + AttributeReference("d", FloatType)()) + + val secondTable = LocalRelation( + AttributeReference("a", StringType)(), + AttributeReference("b", TimestampType)(), + AttributeReference("c", IntegerType)(), + AttributeReference("d", FloatType)()) + + val thirdTable = LocalRelation( + AttributeReference("a", StringType)(), + AttributeReference("b", DoubleType)(), + AttributeReference("c", TimestampType)(), + AttributeReference("d", FloatType)()) + + val fourthTable = LocalRelation( + AttributeReference("a", StringType)(), + AttributeReference("b", DoubleType)(), + AttributeReference("c", IntegerType)(), + AttributeReference("d", TimestampType)()) + + val r1 = Union(firstTable, secondTable) + val r2 = Union(firstTable, thirdTable) + val r3 = Union(firstTable, fourthTable) + val r4 = Except(firstTable, secondTable, isAll = false) + val r5 = Intersect(firstTable, secondTable, isAll = false) + + assertAnalysisError(r1, + Seq("Union can only be performed on tables with the compatible column types. " + + "timestamp <> double at the second column of the second table")) + + assertAnalysisError(r2, + Seq("Union can only be performed on tables with the compatible column types. " + + "timestamp <> int at the third column of the second table")) + + assertAnalysisError(r3, + Seq("Union can only be performed on tables with the compatible column types. " + + "timestamp <> float at the 4th column of the second table")) + + assertAnalysisError(r4, + Seq("Except can only be performed on tables with the compatible column types. " + + "timestamp <> double at the second column of the second table")) + + assertAnalysisError(r5, + Seq("Intersect can only be performed on tables with the compatible column types. " + + "timestamp <> double at the second column of the second table")) + } }