-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-15764][SQL] Replace N^2 loop in BindReferences #13505
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
6216e94
b1a7646
38e8a99
0b412b0
bc17587
e7c4150
210dbd3
dd94e29
b933fe0
4efd3ee
5504b6c
bdb68ad
99197b7
5e9c258
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
|
|
||
| package org.apache.spark.sql.catalyst | ||
|
|
||
| import com.google.common.collect.Maps | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.types.{StructField, StructType} | ||
|
|
||
|
|
@@ -86,11 +88,42 @@ package object expressions { | |
| /** | ||
| * Helper functions for working with `Seq[Attribute]`. | ||
| */ | ||
| implicit class AttributeSeq(attrs: Seq[Attribute]) { | ||
| implicit class AttributeSeq(val attrs: Seq[Attribute]) { | ||
| /** Creates a StructType with a schema matching this `Seq[Attribute]`. */ | ||
| def toStructType: StructType = { | ||
| StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable))) | ||
| } | ||
|
|
||
| // It's possible that `attrs` is a linked list, which can lead to bad O(n^2) loops when | ||
| // accessing attributes by their ordinals. To avoid this performance penalty, convert the input | ||
| // to an array. | ||
| private lazy val attrsArray = attrs.toArray | ||
|
|
||
| private lazy val exprIdToOrdinal = { | ||
| val arr = attrsArray | ||
| val map = Maps.newHashMapWithExpectedSize[ExprId, Int](arr.length) | ||
| var index = 0 | ||
| while (index < arr.length) { | ||
| val exprId = arr(index).exprId | ||
| if (!map.containsKey(exprId)) { | ||
| map.put(exprId, index) | ||
|
||
| } | ||
| index += 1 | ||
|
||
| } | ||
| map | ||
| } | ||
|
|
||
| /** | ||
| * Returns the attribute at the given index. | ||
| */ | ||
| def apply(ordinal: Int): Attribute = attrsArray(ordinal) | ||
|
|
||
| /** | ||
| * Returns the index of first attribute with a matching expression id, or -1 if no match exists. | ||
| */ | ||
| def getOrdinalWithExprId(exprId: ExprId): Int = { | ||
|
||
| Option(exprIdToOrdinal.get(exprId)).getOrElse(-1) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -296,7 +296,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT | |
| /** | ||
| * All the attributes that are used for this plan. | ||
| */ | ||
| lazy val allAttributes: Seq[Attribute] = children.flatMap(_.output) | ||
| lazy val allAttributes: AttributeSeq = children.flatMap(_.output) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ericl and I found another layer of polynomial looping: in QueryPlan.cleanArgs we take every expression in the query plan and bind its references against
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably construct the AttributeSeq outside of the loop in the various projection operators, too, although that doesn't appear to be as serious a bottleneck yet. |
||
|
|
||
| private def cleanExpression(e: Expression): Expression = e match { | ||
| case a: Alias => | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was vaguely-related yet unused code that I stumbled across while looking for similar occurrences of this pattern, so I decided to remove it.