-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25388][Test][SQL] Detect incorrect nullable of DataType in the result #22375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
11379e0
fc987aa
884fd80
33e589d
9ef335d
edc3d7c
5f84e80
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,11 +69,17 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
|
|
||
| /** | ||
| * Check the equality between result of expression and expected value, it will handle | ||
| * Array[Byte], Spread[Double], MapData and Row. | ||
| * Array[Byte], Spread[Double], MapData and Row. Also check whether exprNullable is true | ||
|
||
| * if result of expression is null | ||
| */ | ||
| protected def checkResult(result: Any, expected: Any, exprDataType: DataType): Boolean = { | ||
| protected def checkResult( | ||
| result: Any, | ||
| expected: Any, | ||
| exprDataType: DataType, | ||
| exprNullable: Boolean): Boolean = { | ||
| val dataType = UserDefinedType.sqlType(exprDataType) | ||
|
|
||
| assert(result != null || exprNullable) | ||
|
||
| (result, expected) match { | ||
| case (result: Array[Byte], expected: Array[Byte]) => | ||
| java.util.Arrays.equals(result, expected) | ||
|
|
@@ -83,24 +89,24 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val st = dataType.asInstanceOf[StructType] | ||
| assert(result.numFields == st.length && expected.numFields == st.length) | ||
| st.zipWithIndex.forall { case (f, i) => | ||
| checkResult(result.get(i, f.dataType), expected.get(i, f.dataType), f.dataType) | ||
| checkResult( | ||
| result.get(i, f.dataType), expected.get(i, f.dataType), f.dataType, f.nullable) | ||
| } | ||
| case (result: ArrayData, expected: ArrayData) => | ||
| result.numElements == expected.numElements && { | ||
| val et = dataType.asInstanceOf[ArrayType].elementType | ||
| val ArrayType(et, cn) = dataType.asInstanceOf[ArrayType] | ||
| var isSame = true | ||
| var i = 0 | ||
| while (isSame && i < result.numElements) { | ||
| isSame = checkResult(result.get(i, et), expected.get(i, et), et) | ||
| isSame = checkResult(result.get(i, et), expected.get(i, et), et, cn) | ||
| i += 1 | ||
| } | ||
| isSame | ||
| } | ||
| case (result: MapData, expected: MapData) => | ||
| val kt = dataType.asInstanceOf[MapType].keyType | ||
| val vt = dataType.asInstanceOf[MapType].valueType | ||
| checkResult(result.keyArray, expected.keyArray, ArrayType(kt)) && | ||
| checkResult(result.valueArray, expected.valueArray, ArrayType(vt)) | ||
| val MapType(kt, vt, vcn) = dataType.asInstanceOf[MapType] | ||
| checkResult(result.keyArray, expected.keyArray, ArrayType(kt, false), false) && | ||
| checkResult(result.valueArray, expected.valueArray, ArrayType(vt, vcn), false) | ||
| case (result: Double, expected: Double) => | ||
| if (expected.isNaN) result.isNaN else expected == result | ||
| case (result: Float, expected: Float) => | ||
|
|
@@ -175,7 +181,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val actual = try evaluateWithoutCodegen(expression, inputRow) catch { | ||
| case e: Exception => fail(s"Exception evaluating $expression", e) | ||
| } | ||
| if (!checkResult(actual, expected, expression.dataType)) { | ||
| if (!checkResult(actual, expected, expression.dataType, expression.nullable)) { | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
| fail(s"Incorrect evaluation (codegen off): $expression, " + | ||
| s"actual: $actual, " + | ||
|
|
@@ -191,7 +197,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| for (fallbackMode <- modes) { | ||
| withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) { | ||
| val actual = evaluateWithMutableProjection(expression, inputRow) | ||
| if (!checkResult(actual, expected, expression.dataType)) { | ||
| if (!checkResult(actual, expected, expression.dataType, expression.nullable)) { | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
| fail(s"Incorrect evaluation (fallback mode = $fallbackMode): $expression, " + | ||
| s"actual: $actual, expected: $expected$input") | ||
|
|
@@ -221,6 +227,12 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| val unsafeRow = evaluateWithUnsafeProjection(expression, inputRow) | ||
| val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" | ||
|
|
||
| val dataType = expression.dataType | ||
| if (!checkResult(unsafeRow.get(0, dataType), expected, dataType, expression.nullable)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why did you add this?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is because this statement checks consistency between
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mmmh, I am not sure about this. Do we then still need the code below? Seems to me we are checking the same thing twice, please correct me if I am wrong.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We check different properties in these two
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I just meant that here we are checking the result and we are doing the same after too. Shouldn't we just add an assert for
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
At 1, as we discussed, we need to check the consistency recursively. IIUC,
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
good point, I was not considering it. Then, do we need the check at https://github.com/apache/spark/pull/22375/files/9ef335d6e43a6ef7d253d0ed3564f95bd0278f71#diff-41747ec3f56901eb7bfb95d2a217e94dL231? Isn't it performed in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
| fail("Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " + | ||
| s"$expression, actual: $unsafeRow, expected: $expected, " + | ||
| s"dataType: $dataType, nullable: ${expression.nullable}") | ||
| } | ||
| if (expected == null) { | ||
| if (!unsafeRow.isNullAt(0)) { | ||
| val expectedRow = InternalRow(expected, expected) | ||
|
|
@@ -229,8 +241,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| } | ||
| } else { | ||
| val lit = InternalRow(expected, expected) | ||
|
||
| val expectedRow = | ||
| UnsafeProjection.create(Array(expression.dataType, expression.dataType)).apply(lit) | ||
| val expectedRow = UnsafeProjection.create(Array(dataType, dataType)).apply(lit) | ||
| if (unsafeRow != expectedRow) { | ||
| fail(s"Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " + | ||
| s"$expression, actual: $unsafeRow, expected: $expectedRow$input") | ||
|
|
@@ -280,15 +291,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa | |
| expression) | ||
| plan.initialize(0) | ||
| var actual = plan(inputRow).get(0, expression.dataType) | ||
| assert(checkResult(actual, expected, expression.dataType)) | ||
| assert(checkResult(actual, expected, expression.dataType, expression.nullable)) | ||
|
|
||
| plan = generateProject( | ||
| GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil), | ||
| expression) | ||
| plan.initialize(0) | ||
| actual = FromUnsafeProjection(expression.dataType :: Nil)( | ||
| plan(inputRow)).get(0, expression.dataType) | ||
| assert(checkResult(actual, expected, expression.dataType)) | ||
| assert(checkResult(actual, expected, expression.dataType, expression.nullable)) | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,10 @@ package org.apache.spark.sql.catalyst.expressions | |
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode} | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.Block._ | ||
| import org.apache.spark.sql.types.{DataType, IntegerType} | ||
| import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} | ||
| import org.apache.spark.sql.types.{DataType, IntegerType, MapType} | ||
|
|
||
| /** | ||
| * A test suite for testing [[ExpressionEvalHelper]]. | ||
|
|
@@ -35,6 +36,13 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper | |
| val e = intercept[RuntimeException] { checkEvaluation(BadCodegenExpression(), 10) } | ||
| assert(e.getMessage.contains("some_variable")) | ||
| } | ||
|
|
||
| test("SPARK-25388: checkEvaluation should fail if nullable in DataType is incorrect") { | ||
| val e = intercept[RuntimeException] { | ||
| checkEvaluation(MapIncorrectDataTypeExpression(), Map(3 -> 7, 6 -> null)) | ||
|
||
| } | ||
| assert(e.getMessage.contains("and exprNullable was")) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -53,3 +61,18 @@ case class BadCodegenExpression() extends LeafExpression { | |
| } | ||
| override def dataType: DataType = IntegerType | ||
| } | ||
|
|
||
| /** | ||
| * An expression that returns a MapData with incorrect DataType whose valueContainsNull is false | ||
| * while its value includes null | ||
| */ | ||
| case class MapIncorrectDataTypeExpression() extends LeafExpression with CodegenFallback { | ||
| override def nullable: Boolean = false | ||
| override def eval(input: InternalRow): Any = { | ||
| val keys = new GenericArrayData(Array(3, 6)) | ||
| val values = new GenericArrayData(Array(7, null)) | ||
| new ArrayBasedMapData(keys, values) | ||
| } | ||
| // since values includes null, valueContainsNull must be true | ||
| override def dataType: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a little weird to ask the caller to provide both
expectedandexprNullable, and then useexprNullableto validateexpected.Can we set a default value forexprNullableincheckResult?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is another option that I thought. On the other hand, to set default has a risk to overlook a possible incosistency between value and
nullableat top level ofexpected.Do we use the default value at the all of callers of
checkResult?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe we should provide an overload of
checkResultthat takesExpression, which providesdataTypeandnullable