-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-32914][SQL] Avoid constructing dataType multiple times #29790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1048,6 +1048,11 @@ trait ComplexTypeMergingExpression extends Expression { | |
| @transient | ||
| lazy val inputTypesForMerging: Seq[DataType] = children.map(_.dataType) | ||
|
|
||
| private lazy val internalDataType: DataType = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we put it right before the line of |
||
| dataTypeCheck | ||
| inputTypesForMerging.reduceLeft(TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(_, _).get) | ||
| } | ||
|
|
||
| def dataTypeCheck: Unit = { | ||
| require( | ||
| inputTypesForMerging.nonEmpty, | ||
|
|
@@ -1058,10 +1063,7 @@ trait ComplexTypeMergingExpression extends Expression { | |
| s" The input types found are\n\t${inputTypesForMerging.mkString("\n\t")}") | ||
| } | ||
|
|
||
| override def dataType: DataType = { | ||
| dataTypeCheck | ||
| inputTypesForMerging.reduceLeft(TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(_, _).get) | ||
| } | ||
| override def dataType: DataType = internalDataType | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -368,7 +368,7 @@ case class MapEntries(child: Expression) | |
|
|
||
| @transient private lazy val childDataType: MapType = child.dataType.asInstanceOf[MapType] | ||
|
|
||
| override def dataType: DataType = { | ||
| private lazy val internalDataType: DataType = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it expensive? it just creates a few objects.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| ArrayType( | ||
| StructType( | ||
| StructField("key", childDataType.keyType, false) :: | ||
|
|
@@ -377,6 +377,8 @@ case class MapEntries(child: Expression) | |
| false) | ||
| } | ||
|
|
||
| override def dataType: DataType = internalDataType | ||
|
|
||
| override protected def nullSafeEval(input: Any): Any = { | ||
| val childMap = input.asInstanceOf[MapData] | ||
| val keys = childMap.keyArray() | ||
|
|
@@ -3498,13 +3500,16 @@ object ArrayUnion { | |
| since = "2.4.0") | ||
| case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBinaryLike | ||
| with ComplexTypeMergingExpression { | ||
| override def dataType: DataType = { | ||
|
|
||
| private lazy val internalDataType: DataType = { | ||
| dataTypeCheck | ||
|
wangyum marked this conversation as resolved.
wangyum marked this conversation as resolved.
|
||
| ArrayType(elementType, | ||
| left.dataType.asInstanceOf[ArrayType].containsNull && | ||
| right.dataType.asInstanceOf[ArrayType].containsNull) | ||
| } | ||
|
|
||
| override def dataType: DataType = internalDataType | ||
|
|
||
| @transient lazy val evalIntersect: (ArrayData, ArrayData) => ArrayData = { | ||
| if (TypeUtils.typeWithProperEquals(elementType)) { | ||
| (array1, array2) => | ||
|
|
@@ -3741,11 +3746,13 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina | |
| case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryLike | ||
| with ComplexTypeMergingExpression { | ||
|
|
||
| override def dataType: DataType = { | ||
| private lazy val internalDataType: DataType = { | ||
| dataTypeCheck | ||
|
wangyum marked this conversation as resolved.
|
||
| left.dataType | ||
| } | ||
|
|
||
| override def dataType: DataType = internalDataType | ||
|
|
||
| @transient lazy val evalExcept: (ArrayData, ArrayData) => ArrayData = { | ||
| if (TypeUtils.typeWithProperEquals(elementType)) { | ||
| (array1, array2) => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -225,10 +225,13 @@ case class ScalarSubquery( | |
| children: Seq[Expression] = Seq.empty, | ||
| exprId: ExprId = NamedExpression.newExprId) | ||
| extends SubqueryExpression(plan, children, exprId) with Unevaluable { | ||
| override def dataType: DataType = { | ||
|
|
||
| private lazy val internalDataType: DataType = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this need to be a lazy val? seems a very cheap method.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reverted this change because I did not find an expression to call this method many times. |
||
| assert(plan.schema.fields.nonEmpty, "Scalar subquery should have only one column") | ||
| plan.schema.fields.head.dataType | ||
| } | ||
|
|
||
| override def dataType: DataType = internalDataType | ||
| override def nullable: Boolean = true | ||
| override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(plan = plan) | ||
| override def toString: String = s"scalar-subquery#${exprId.id} $conditionString" | ||
|
|
||

Uh oh!
There was an error while loading. Please reload this page.