-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25388][Test][SQL] Detect incorrect nullable of DataType in the result #22375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
11379e0
fc987aa
884fd80
33e589d
9ef335d
edc3d7c
5f84e80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,11 +69,22 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
|
|
||
| /** | ||
| * Check the equality between result of expression and expected value, it will handle | ||
| * Array[Byte], Spread[Double], MapData and Row. | ||
| * Array[Byte], Spread[Double], MapData and Row. Also check whether nullable in expression is | ||
| * true if result is null | ||
| */ | ||
| protected def checkResult(result: Any, expected: Any, exprDataType: DataType): Boolean = { | ||
| protected def checkResult(result: Any, expected: Any, expression: Expression): Boolean = { | ||
| checkResult(result, expected, expression.dataType, expression.nullable) | ||
| } | ||
|
|
||
| protected def checkResult( | ||
| result: Any, | ||
| expected: Any, | ||
| exprDataType: DataType, | ||
| exprNullable: Boolean): Boolean = { | ||
| val dataType = UserDefinedType.sqlType(exprDataType) | ||
|
|
||
| // The result is null for a non-nullable expression | ||
| assert(result != null || exprNullable, "exprNullable should be true if result is null") | ||
| (result, expected) match { | ||
| case (result: Array[Byte], expected: Array[Byte]) => | ||
| java.util.Arrays.equals(result, expected) | ||
|
|
@@ -83,24 +94,24 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val st = dataType.asInstanceOf[StructType] | ||
| assert(result.numFields == st.length && expected.numFields == st.length) | ||
| st.zipWithIndex.forall { case (f, i) => | ||
| checkResult(result.get(i, f.dataType), expected.get(i, f.dataType), f.dataType) | ||
| checkResult( | ||
| result.get(i, f.dataType), expected.get(i, f.dataType), f.dataType, f.nullable) | ||
| } | ||
| case (result: ArrayData, expected: ArrayData) => | ||
| result.numElements == expected.numElements && { | ||
| val et = dataType.asInstanceOf[ArrayType].elementType | ||
| val ArrayType(et, cn) = dataType.asInstanceOf[ArrayType] | ||
| var isSame = true | ||
| var i = 0 | ||
| while (isSame && i < result.numElements) { | ||
| isSame = checkResult(result.get(i, et), expected.get(i, et), et) | ||
| isSame = checkResult(result.get(i, et), expected.get(i, et), et, cn) | ||
| i += 1 | ||
| } | ||
| isSame | ||
| } | ||
| case (result: MapData, expected: MapData) => | ||
| val kt = dataType.asInstanceOf[MapType].keyType | ||
| val vt = dataType.asInstanceOf[MapType].valueType | ||
| checkResult(result.keyArray, expected.keyArray, ArrayType(kt)) && | ||
| checkResult(result.valueArray, expected.valueArray, ArrayType(vt)) | ||
| val MapType(kt, vt, vcn) = dataType.asInstanceOf[MapType] | ||
| checkResult(result.keyArray, expected.keyArray, ArrayType(kt, false), false) && | ||
| checkResult(result.valueArray, expected.valueArray, ArrayType(vt, vcn), false) | ||
| case (result: Double, expected: Double) => | ||
| if (expected.isNaN) result.isNaN else expected == result | ||
| case (result: Float, expected: Float) => | ||
|
|
@@ -175,7 +186,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val actual = try evaluateWithoutCodegen(expression, inputRow) catch { | ||
| case e: Exception => fail(s"Exception evaluating $expression", e) | ||
| } | ||
| if (!checkResult(actual, expected, expression.dataType)) { | ||
| if (!checkResult(actual, expected, expression)) { | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
| fail(s"Incorrect evaluation (codegen off): $expression, " + | ||
| s"actual: $actual, " + | ||
|
|
@@ -191,7 +202,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| for (fallbackMode <- modes) { | ||
| withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) { | ||
| val actual = evaluateWithMutableProjection(expression, inputRow) | ||
| if (!checkResult(actual, expected, expression.dataType)) { | ||
| if (!checkResult(actual, expected, expression)) { | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
| fail(s"Incorrect evaluation (fallback mode = $fallbackMode): $expression, " + | ||
| s"actual: $actual, expected: $expected$input") | ||
|
|
@@ -221,6 +232,12 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val unsafeRow = evaluateWithUnsafeProjection(expression, inputRow) | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
|
|
||
| val dataType = expression.dataType | ||
| if (!checkResult(unsafeRow.get(0, dataType), expected, dataType, expression.nullable)) { | ||
| fail("Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " + | ||
| s"$expression, actual: $unsafeRow, expected: $expected, " + | ||
| s"dataType: $dataType, nullable: ${expression.nullable}") | ||
| } | ||
| if (expected == null) { | ||
| if (!unsafeRow.isNullAt(0)) { | ||
| val expectedRow = InternalRow(expected, expected) | ||
|
|
@@ -229,8 +246,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| } | ||
| } else { | ||
| val lit = InternalRow(expected, expected) | ||
|
||
| val expectedRow = | ||
| UnsafeProjection.create(Array(expression.dataType, expression.dataType)).apply(lit) | ||
| val expectedRow = UnsafeProjection.create(Array(dataType, dataType)).apply(lit) | ||
| if (unsafeRow != expectedRow) { | ||
| fail(s"Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
|
|
@@ -280,15 +296,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| expression) | ||
| plan.initialize(0) | ||
| var actual = plan(inputRow).get(0, expression.dataType) | ||
| assert(checkResult(actual, expected, expression.dataType)) | ||
| assert(checkResult(actual, expected, expression)) | ||
|
|
||
| plan = generateProject( | ||
| GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), | ||
| expression) | ||
| plan.initialize(0) | ||
| actual = FromUnsafeProjection(expression.dataType :: Nil)( | ||
| plan(inputRow)).get(0, expression.dataType) | ||
| assert(checkResult(actual, expected, expression.dataType)) | ||
| assert(checkResult(actual, expected, expression)) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,10 @@ package org.apache.spark.sql.catalyst.expressions | |
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.Block._ | ||
| import org.apache.spark.sql.types.{DataType, IntegerType} | ||
| import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} | ||
| import org.apache.spark.sql.types.{DataType, IntegerType, MapType} | ||
|
|
||
| /** | ||
| * A test suite for testing [[ExpressionEvalHelper]]. | ||
|
|
@@ -35,6 +36,13 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper | |
| val e = intercept[RuntimeException] { checkEvaluation(BadCodegenExpression(), 10) } | ||
| assert(e.getMessage.contains("some_variable")) | ||
| } | ||
|
|
||
| test("SPARK-25388: checkEvaluation should fail if nullable in DataType is incorrect") { | ||
| val e = intercept[RuntimeException] { | ||
| checkEvaluation(MapIncorrectDataTypeExpression(), Map(3 -> 7, 6 -> null)) | ||
|
||
| } | ||
| assert(e.getMessage.contains("and exprNullable was")) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -53,3 +61,18 @@ case class BadCodegenExpression() extends LeafExpression { | |
| } | ||
| override def dataType: DataType = IntegerType | ||
| } | ||
|
|
||
| /** | ||
| * An expression that returns a MapData with incorrect DataType whose valueContainsNull is false | ||
| * while its value includes null | ||
| */ | ||
| case class MapIncorrectDataTypeExpression() extends LeafExpression with CodegenFallback { | ||
| override def nullable: Boolean = false | ||
| override def eval(input: InternalRow): Any = { | ||
| val keys = new GenericArrayData(Array(3, 6)) | ||
| val values = new GenericArrayData(Array(7, null)) | ||
| new ArrayBasedMapData(keys, values) | ||
| } | ||
| // since values includes null, valueContainsNull must be true | ||
| override def dataType: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why did you add this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is because this statement checks consistency between
expressionand itsnullable, as you proposed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mmmh, I am not sure about this. Do we then still need the code below? Seems to me we are checking the same thing twice, please correct me if I am wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We check different properties in these two
ifstatements.nullableinexpectedexpectedandexpressionThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, I just meant that here we are checking the result and we are doing the same after too. Shouldn't we just add an assert for
unsafeRow.get(0, dataType) != null || expression.nullablehere instead?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
expected. 2. seesexpectedandexpression. Thus, we are doing different.At 1, as we discussed, we need to check the consistency recursively. IIUC,
unsafeRow.get(0, dataType) != null || expression.nullabledoes not perform checks recursively. Do I make a misunderstanding?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good point, I was not considering it. Then, do we need the check at https://github.com/apache/spark/pull/22375/files/9ef335d6e43a6ef7d253d0ed3564f95bd0278f71#diff-41747ec3f56901eb7bfb95d2a217e94dL231? Isn't it performed in
checkResult?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
checkResultalready validatesexpressionaccording to the nullability of the given expression at 1. Thus, if theexpectedis not correct, 2. will detect an incorrect point.