Skip to content
Closed
10 changes: 10 additions & 0 deletions core/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@
"The <functionName> does not support ordering on type <dataType>."
]
},
"IN_SUBQUERY_DATA_TYPE_MISMATCH" : {
"message" : [
"The data type of one or more elements in the left hand side of an IN subquery is not compatible with the data type of the output of the subquery. Mismatched columns: [<mismatchedColumns>], left side: [<leftType>], right side: [<rightType>]."
]
},
"IN_SUBQUERY_LENGTH_MISMATCH" : {
"message" : [
"The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. Left hand side columns(length: <leftLength>): [<leftColumns>], right hand side columns(length: <rightLength>): [<rightColumns>]."
]
},
"MAP_CONCAT_DIFF_TYPES" : {
"message" : [
"The <functionName> should all be of type map, but it's <dataType>."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.trees.TernaryLike
import org.apache.spark.sql.catalyst.trees.TreePattern.{CASE_WHEN, IF, TreePattern}
import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLExpr, toSQLId, toSQLType}
import org.apache.spark.sql.types._

// scalastyle:off line.size.limit
Expand Down Expand Up @@ -60,12 +62,24 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi

override def checkInputDataTypes(): TypeCheckResult = {
if (predicate.dataType != BooleanType) {
TypeCheckResult.TypeCheckFailure(
"type of predicate expression in If should be boolean, " +
s"not ${predicate.dataType.catalogString}")
DataTypeMismatch(
errorSubClass = "UNEXPECTED_INPUT_TYPE",
messageParameters = Map(
"paramIndex" -> "1",
"requiredType" -> toSQLType(BooleanType),
"inputSql" -> toSQLExpr(predicate),
"inputType" -> toSQLType(predicate.dataType)
)
)
} else if (!TypeCoercion.haveSameType(inputTypesForMerging)) {
TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
s"(${trueValue.dataType.catalogString} and ${falseValue.dataType.catalogString}).")
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> toSQLId(prettyName),
"dataType" -> Seq(trueValue.dataType,
falseValue.dataType).map(toSQLType).mkString("[", ", ", "]")
)
)
} else {
TypeCheckResult.TypeCheckSuccess
}
Expand Down Expand Up @@ -172,17 +186,24 @@ case class CaseWhen(
TypeCheckResult.TypeCheckSuccess
} else {
val index = branches.indexWhere(_._1.dataType != BooleanType)
TypeCheckResult.TypeCheckFailure(
s"WHEN expressions in CaseWhen should all be boolean type, " +
s"but the ${index + 1}th when expression's type is ${branches(index)._1}")
DataTypeMismatch(
errorSubClass = "UNEXPECTED_INPUT_TYPE",
messageParameters = Map(
"paramIndex" -> (index + 1).toString,
"requiredType" -> toSQLType(BooleanType),
"inputSql" -> toSQLExpr(branches(index)._1),
"inputType" -> toSQLType(branches(index)._1.dataType)
)
)
}
} else {
val branchesStr = branches.map(_._2.dataType).map(dt => s"WHEN ... THEN ${dt.catalogString}")
.mkString(" ")
val elseStr = elseValue.map(expr => s" ELSE ${expr.dataType.catalogString}").getOrElse("")
TypeCheckResult.TypeCheckFailure(
"THEN and ELSE expressions should all be same type or coercible to a common type," +
s" got CASE $branchesStr$elseStr END")
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> toSQLId(prettyName),
"dataType" -> inputTypesForMerging.map(toSQLType).mkString("[", ", ", "]")
)
)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ import scala.collection.immutable.TreeSet
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
import org.apache.spark.sql.catalyst.expressions.Cast._
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project}
Expand All @@ -31,7 +33,6 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._


/**
* A base class for generated/interpreted predicate
*/
Expand Down Expand Up @@ -375,16 +376,15 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)

override def checkInputDataTypes(): TypeCheckResult = {
if (values.length != query.childOutputs.length) {
TypeCheckResult.TypeCheckFailure(
s"""
|The number of columns in the left hand side of an IN subquery does not match the
|number of columns in the output of subquery.
|#columns in left hand side: ${values.length}.
|#columns in right hand side: ${query.childOutputs.length}.
|Left side columns:
|[${values.map(_.sql).mkString(", ")}].
|Right side columns:
|[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
DataTypeMismatch(
errorSubClass = "IN_SUBQUERY_LENGTH_MISMATCH",
messageParameters = Map(
"leftLength" -> values.length.toString,
"rightLength" -> query.childOutputs.length.toString,
"leftColumns" -> values.map(toSQLExpr(_)).mkString(", "),
"rightColumns" -> query.childOutputs.map(toSQLExpr(_)).mkString(", ")
)
)
} else if (!DataType.equalsStructurally(
query.dataType, value.dataType, ignoreNullability = true)) {

Expand All @@ -393,16 +393,14 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
case _ => None
}
TypeCheckResult.TypeCheckFailure(
s"""
|The data type of one or more elements in the left hand side of an IN subquery
|is not compatible with the data type of the output of the subquery
|Mismatched columns:
|[${mismatchedColumns.mkString(", ")}]
|Left side:
|[${values.map(_.dataType.catalogString).mkString(", ")}].
|Right side:
|[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
DataTypeMismatch(
errorSubClass = "IN_SUBQUERY_DATA_TYPE_MISMATCH",
messageParameters = Map(
"mismatchedColumns" -> mismatchedColumns.mkString(", "),
"leftType" -> values.map(left => toSQLType(left.dataType)).mkString(", "),
"rightType" -> query.childOutputs.map(right => toSQLType(right.dataType)).mkString(", ")
)
)
} else {
TypeUtils.checkForOrderingExpr(value.dataType, prettyName)
}
Expand Down Expand Up @@ -450,8 +448,13 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType,
ignoreNullability = true))
if (mismatchOpt.isDefined) {
TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
s"${value.dataType.catalogString} != ${mismatchOpt.get.dataType.catalogString}")
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> toSQLId(prettyName),
"dataType" -> children.map(child => toSQLType(child.dataType)).mkString("[", ", ", "]")
)
)
} else {
TypeUtils.checkForOrderingExpr(value.dataType, prettyName)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil,
LocalRelation()
)
assertAnalysisError(plan, Seq("data type mismatch: Arguments must be same type"))
assertAnalysisErrorClass(
plan,
"DATATYPE_MISMATCH.DATA_DIFF_TYPES",
Map(
"functionName" -> "`in`",
"dataType" -> "[\"VOID\", \"BOOLEAN\", \"INT\"]",
"sqlExpr" -> "\"(NULL IN (true, 1))\""))
}

test("SPARK-11725: correctly handle null inputs for ScalaUDF") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,23 +351,62 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
)
)

assertError(If($"intField", $"stringField", $"stringField"),
"type of predicate expression in If should be boolean")
assertError(If($"booleanField", $"intField", $"booleanField"),
"data type mismatch")
assert(If(Literal(1), Literal("a"), Literal("b")).checkInputDataTypes() ==
DataTypeMismatch(
errorSubClass = "UNEXPECTED_INPUT_TYPE",
messageParameters = Map(
"paramIndex" -> "1",
"requiredType" -> toSQLType(BooleanType),
"inputSql" -> "\"1\"",
"inputType" -> "\"INT\""
)
)
)

assertError(
CaseWhen(Seq(($"booleanField".attr, $"intField".attr),
($"booleanField".attr, $"mapField".attr))),
"THEN and ELSE expressions should all be same type or coercible to a common type")
assertError(
CaseKeyWhen($"intField", Seq($"intField", $"stringField",
$"intField", $"mapField")),
"THEN and ELSE expressions should all be same type or coercible to a common type")
assertError(
CaseWhen(Seq(($"booleanField".attr, $"intField".attr),
($"intField".attr, $"intField".attr))),
"WHEN expressions in CaseWhen should all be boolean type")
assert(If(Literal(true), Literal(1), Literal(false)).checkInputDataTypes() ==
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> "`if`",
"dataType" -> "[\"INT\", \"BOOLEAN\"]"
)
)
)

assert(CaseWhen(Seq((Literal(true), Literal(1)),
(Literal(true), Literal("a")))).checkInputDataTypes() ==
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> "`casewhen`",
"dataType" -> "[\"INT\", \"STRING\"]"
)
)
)

assert(CaseKeyWhen(Literal(1), Seq(Literal(1), Literal("a"),
Literal(2), Literal(3))).checkInputDataTypes() ==
DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> "`casewhen`",
"dataType" -> "[\"STRING\", \"INT\"]"
)
)
)

assert(CaseWhen(Seq((Literal(true), Literal(1)),
(Literal(2), Literal(3)))).checkInputDataTypes() ==
DataTypeMismatch(
errorSubClass = "UNEXPECTED_INPUT_TYPE",
messageParameters = Map(
"paramIndex" -> "2",
"requiredType" -> "\"BOOLEAN\"",
"inputSql" -> "\"2\"",
"inputType" -> "\"INT\""
)
)
)
}

test("check types for aggregates") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -238,16 +238,19 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper

val checkResult1 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
(Literal.FalseLiteral, caseVal2))).checkInputDataTypes()
assert(checkResult1.isInstanceOf[TypeCheckResult.TypeCheckFailure])
assert(checkResult1.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
.contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> END"))
assert(checkResult1 == DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> "`casewhen`",
"dataType" -> "[\"STRUCT<x: INT>\", \"STRUCT<y: INT>\"]")))

val checkResult2 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
(Literal.FalseLiteral, caseVal2)), Some(elseVal)).checkInputDataTypes()
assert(checkResult2.isInstanceOf[TypeCheckResult.TypeCheckFailure])
assert(checkResult2.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
.contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> " +
"ELSE struct<z:int> END"))
assert(checkResult2 == DataTypeMismatch(
errorSubClass = "DATA_DIFF_TYPES",
messageParameters = Map(
"functionName" -> "`casewhen`",
"dataType" -> "[\"STRUCT<x: INT>\", \"STRUCT<y: INT>\", \"STRUCT<z: INT>\"]")))
}

test("SPARK-27917 test semantic equals of CaseWhen") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,23 @@

package test.org.apache.spark.sql;

import java.util.*;

import com.google.common.collect.Maps;

import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.test.TestSparkSession;
import org.apache.spark.sql.types.StructType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.*;

import static org.apache.spark.sql.types.DataTypes.*;

Expand Down Expand Up @@ -79,12 +83,13 @@ public void isInCollectionCheckExceptionMessage() {
createStructField("a", IntegerType, false),
createStructField("b", createArrayType(IntegerType, false), false)));
Dataset<Row> df = spark.createDataFrame(rows, schema);
Exception e = Assert.assertThrows(Exception.class,
AnalysisException e = Assert.assertThrows(AnalysisException.class,
() -> df.filter(df.col("a").isInCollection(Arrays.asList(new Column("b")))));
Arrays.asList("cannot resolve",
"due to data type mismatch: Arguments must be same type but were")
.forEach(s ->
Assert.assertTrue(e.getMessage().toLowerCase(Locale.ROOT)
.contains(s.toLowerCase(Locale.ROOT))));
Assert.assertTrue(e.getErrorClass().equals("DATATYPE_MISMATCH.DATA_DIFF_TYPES"));
Map<String, String> messageParameters = new HashMap();
messageParameters.put("functionName", "`in`");
messageParameters.put("dataType", "[\"INT\", \"ARRAY<INT>\"]");
messageParameters.put("sqlExpr", "\"(a IN (b))\"");
Assert.assertTrue(Maps.difference(e.getMessageParameters(), messageParameters).areEqual());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "_LEGACY_ERROR_TEMP_2315",
"errorClass" : "DATATYPE_MISMATCH.IN_SUBQUERY_LENGTH_MISMATCH",
"messageParameters" : {
"hint" : "",
"msg" : "\nThe number of columns in the left hand side of an IN subquery does not match the\nnumber of columns in the output of subquery.\n#columns in left hand side: 2.\n#columns in right hand side: 1.\nLeft side columns:\n[tab_a.a1, tab_a.b1].\nRight side columns:\n[`named_struct(a2, a2, b2, b2)`].",
"sqlExpr" : "(named_struct('a1', tab_a.a1, 'b1', tab_a.b1) IN (listquery()))"
"leftColumns" : "\"a1\", \"b1\"",
"leftLength" : "2",
"rightColumns" : "\"named_struct(a2, a2, b2, b2)\"",
"rightLength" : "1",
"sqlExpr" : "\"(named_struct('a1', a1, 'b1', b1) IN (listquery()))\""
},
"queryContext" : [ {
"objectType" : "",
Expand Down
Loading