-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21304][SQL] remove unnecessary isNull variable for collection related encoder expressions #18529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-21304][SQL] remove unnecessary isNull variable for collection related encoder expressions #18529
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -465,7 +465,11 @@ object MapObjects { | |
| customCollectionCls: Option[Class[_]] = None): MapObjects = { | ||
| val id = curId.getAndIncrement() | ||
| val loopValue = s"MapObjects_loopValue$id" | ||
| val loopIsNull = s"MapObjects_loopIsNull$id" | ||
| val loopIsNull = if (elementNullable) { | ||
| s"MapObjects_loopIsNull$id" | ||
| } else { | ||
| "false" | ||
| } | ||
| val loopVar = LambdaVariable(loopValue, loopIsNull, elementType, elementNullable) | ||
| MapObjects( | ||
| loopValue, loopIsNull, elementType, function(loopVar), inputData, customCollectionCls) | ||
|
|
@@ -517,7 +521,6 @@ case class MapObjects private( | |
|
|
||
| override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { | ||
| val elementJavaType = ctx.javaType(loopVarDataType) | ||
| ctx.addMutableState("boolean", loopIsNull, "") | ||
| ctx.addMutableState(elementJavaType, loopValue, "") | ||
| val genInputData = inputData.genCode(ctx) | ||
| val genFunction = lambdaFunction.genCode(ctx) | ||
|
|
@@ -588,12 +591,14 @@ case class MapObjects private( | |
| case _ => genFunction.value | ||
| } | ||
|
|
||
| val loopNullCheck = inputDataType match { | ||
| case _: ArrayType => s"$loopIsNull = ${genInputData.value}.isNullAt($loopIndex);" | ||
| // The element of primitive array will never be null. | ||
| case ObjectType(cls) if cls.isArray && cls.getComponentType.isPrimitive => | ||
| s"$loopIsNull = false" | ||
| case _ => s"$loopIsNull = $loopValue == null;" | ||
| val loopNullCheck = if (loopIsNull != "false") { | ||
| ctx.addMutableState("boolean", loopIsNull, "") | ||
| inputDataType match { | ||
| case _: ArrayType => s"$loopIsNull = ${genInputData.value}.isNullAt($loopIndex);" | ||
| case _ => s"$loopIsNull = $loopValue == null;" | ||
| } | ||
| } else { | ||
| "" | ||
| } | ||
|
|
||
| val (initCollection, addElement, getResult): (String, String => String, String) = | ||
|
|
@@ -667,11 +672,11 @@ case class MapObjects private( | |
| } | ||
| } | ||
|
|
||
| object CollectObjectsToMap { | ||
| object CatalystToExternalMap { | ||
|
||
| private val curId = new java.util.concurrent.atomic.AtomicInteger() | ||
|
|
||
| /** | ||
|
||
| * Construct an instance of CollectObjectsToMap case class. | ||
| * Construct an instance of CatalystToExternalMap case class. | ||
| * | ||
| * @param keyFunction The function applied on the key collection elements. | ||
| * @param valueFunction The function applied on the value collection elements. | ||
|
|
@@ -682,15 +687,19 @@ object CollectObjectsToMap { | |
| keyFunction: Expression => Expression, | ||
| valueFunction: Expression => Expression, | ||
| inputData: Expression, | ||
| collClass: Class[_]): CollectObjectsToMap = { | ||
| collClass: Class[_]): CatalystToExternalMap = { | ||
| val id = curId.getAndIncrement() | ||
| val keyLoopValue = s"CollectObjectsToMap_keyLoopValue$id" | ||
| val keyLoopValue = s"CatalystToExternalMap_keyLoopValue$id" | ||
| val mapType = inputData.dataType.asInstanceOf[MapType] | ||
| val keyLoopVar = LambdaVariable(keyLoopValue, "", mapType.keyType, nullable = false) | ||
| val valueLoopValue = s"CollectObjectsToMap_valueLoopValue$id" | ||
| val valueLoopIsNull = s"CollectObjectsToMap_valueLoopIsNull$id" | ||
| val valueLoopValue = s"CatalystToExternalMap_valueLoopValue$id" | ||
| val valueLoopIsNull = if (mapType.valueContainsNull) { | ||
| s"CatalystToExternalMap_valueLoopIsNull$id" | ||
| } else { | ||
| "false" | ||
| } | ||
| val valueLoopVar = LambdaVariable(valueLoopValue, valueLoopIsNull, mapType.valueType) | ||
| CollectObjectsToMap( | ||
| CatalystToExternalMap( | ||
| keyLoopValue, keyFunction(keyLoopVar), | ||
| valueLoopValue, valueLoopIsNull, valueFunction(valueLoopVar), | ||
| inputData, collClass) | ||
|
|
@@ -716,7 +725,7 @@ object CollectObjectsToMap { | |
| * @param inputData An expression that when evaluated returns a map object. | ||
| * @param collClass The type of the resulting collection. | ||
| */ | ||
| case class CollectObjectsToMap private( | ||
| case class CatalystToExternalMap private( | ||
| keyLoopValue: String, | ||
| keyLambdaFunction: Expression, | ||
| valueLoopValue: String, | ||
|
|
@@ -748,7 +757,6 @@ case class CollectObjectsToMap private( | |
| ctx.addMutableState(keyElementJavaType, keyLoopValue, "") | ||
| val genKeyFunction = keyLambdaFunction.genCode(ctx) | ||
| val valueElementJavaType = ctx.javaType(mapType.valueType) | ||
| ctx.addMutableState("boolean", valueLoopIsNull, "") | ||
| ctx.addMutableState(valueElementJavaType, valueLoopValue, "") | ||
| val genValueFunction = valueLambdaFunction.genCode(ctx) | ||
| val genInputData = inputData.genCode(ctx) | ||
|
|
@@ -781,7 +789,12 @@ case class CollectObjectsToMap private( | |
| val genKeyFunctionValue = genFunctionValue(keyLambdaFunction, genKeyFunction) | ||
| val genValueFunctionValue = genFunctionValue(valueLambdaFunction, genValueFunction) | ||
|
|
||
| val valueLoopNullCheck = s"$valueLoopIsNull = $valueArray.isNullAt($loopIndex);" | ||
| val valueLoopNullCheck = if (valueLoopIsNull != "false") { | ||
| ctx.addMutableState("boolean", valueLoopIsNull, "") | ||
| s"$valueLoopIsNull = $valueArray.isNullAt($loopIndex);" | ||
| } else { | ||
| "" | ||
| } | ||
|
|
||
| val builderClass = classOf[Builder[_, _]].getName | ||
| val constructBuilder = s""" | ||
|
|
@@ -847,9 +860,17 @@ object ExternalMapToCatalyst { | |
| valueNullable: Boolean): ExternalMapToCatalyst = { | ||
| val id = curId.getAndIncrement() | ||
| val keyName = "ExternalMapToCatalyst_key" + id | ||
| val keyIsNull = "ExternalMapToCatalyst_key_isNull" + id | ||
| val keyIsNull = if (keyNullable) { | ||
| "ExternalMapToCatalyst_key_isNull" + id | ||
| } else { | ||
| "false" | ||
| } | ||
| val valueName = "ExternalMapToCatalyst_value" + id | ||
| val valueIsNull = "ExternalMapToCatalyst_value_isNull" + id | ||
| val valueIsNull = if (valueNullable) { | ||
| "ExternalMapToCatalyst_value_isNull" + id | ||
| } else { | ||
| "false" | ||
| } | ||
|
|
||
| ExternalMapToCatalyst( | ||
| keyName, | ||
|
|
@@ -919,9 +940,7 @@ case class ExternalMapToCatalyst private( | |
|
|
||
| val keyElementJavaType = ctx.javaType(keyType) | ||
| val valueElementJavaType = ctx.javaType(valueType) | ||
| ctx.addMutableState("boolean", keyIsNull, "") | ||
| ctx.addMutableState(keyElementJavaType, key, "") | ||
| ctx.addMutableState("boolean", valueIsNull, "") | ||
| ctx.addMutableState(valueElementJavaType, value, "") | ||
|
|
||
| val (defineEntries, defineKeyValue) = child.dataType match { | ||
|
|
@@ -957,16 +976,18 @@ case class ExternalMapToCatalyst private( | |
| defineEntries -> defineKeyValue | ||
| } | ||
|
|
||
| val keyNullCheck = if (ctx.isPrimitiveType(keyType)) { | ||
| s"$keyIsNull = false;" | ||
| } else { | ||
| val keyNullCheck = if (keyIsNull != "false") { | ||
| ctx.addMutableState("boolean", keyIsNull, "") | ||
| s"$keyIsNull = $key == null;" | ||
| } else { | ||
| "" | ||
| } | ||
|
|
||
| val valueNullCheck = if (ctx.isPrimitiveType(valueType)) { | ||
| s"$valueIsNull = false;" | ||
| } else { | ||
| val valueNullCheck = if (valueIsNull != "false") { | ||
| ctx.addMutableState("boolean", valueIsNull, "") | ||
| s"$valueIsNull = $value == null;" | ||
| } else { | ||
| "" | ||
| } | ||
|
|
||
| val arrayCls = classOf[GenericArrayData].getName | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you remove lines 594-595 in the original code? Can we always ensure primitive type array sets
"false"intoloopIsNull?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we can't remove these lines because it might cause a Janino compile error when creating
MapObjectslike:In this case,
$loopValue == nulloccurs the compile error because$loopValueisint.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this case,
loopIsNullwill befalseand we won't hit this branch.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, I chose the example intentionally to be
loopIsNull != "false"(becauseelementNullableistruehere), and we can hit the branch.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, it depends on if we correctly set
elementNullablewhen callingMapObjects.applyfor all the cases.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for
Array[Int], because we calculate theelementNullablebased on type, soelementNullablewill be false.