-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34079][SQL] Merge non-correlated scalar subqueries #32298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 69 commits
e0e39d5
0a7e0e2
e35cdc1
0cff7b2
22e833d
42add09
e63111d
c84f0ee
ee8f12a
17fd666
6134fa9
2828345
1f2f75c
a3e84a4
0fe66dc
100cb9c
9d8dd6b
f83f22b
41c0f0a
d10a8be
db34640
d081885
e98754a
bb623cf
ae1d84e
2eb14f1
060e4b7
d86d2c4
0a97c8b
61f2b34
532d05e
c488377
e0a7610
63c3709
dabbea4
4d97de5
cc8690e
83c78ca
3130913
3e8f7fa
252c9b1
fa5e786
e292732
963c423
9efaf2a
96a502d
5b91d61
8bcf515
87ba289
6d5a124
851ca29
96d0cab
a57ed32
0b34d83
4985d43
de9b312
13a2fad
92ce6e5
67ffae6
a32a85c
1bc8a45
224edef
a7fd1c5
a5eb5df
8457148
1ff64e4
13a1cdb
4da3fe6
96ed6fd
dbe81e2
ba299d5
65f3425
dc5e9b9
c64373b
3993eab
f93283d
8c5c9ac
3b7ad2c
c268580
169fd6b
19128ff
1c4d14b
2590edf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRe | |
| import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} | ||
| import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression | ||
| import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, TypedImperativeAggregate} | ||
| import org.apache.spark.sql.catalyst.plans._ | ||
| import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition} | ||
| import org.apache.spark.sql.catalyst.trees.TreeNodeTag | ||
|
|
@@ -671,23 +671,27 @@ case class CTERelationDef(child: LogicalPlan, id: Long = CTERelationDef.newId) e | |
| } | ||
|
|
||
| object CTERelationDef { | ||
| private val curId = new java.util.concurrent.atomic.AtomicLong() | ||
| private[sql] val curId = new java.util.concurrent.atomic.AtomicLong() | ||
| def newId: Long = curId.getAndIncrement() | ||
| } | ||
|
|
||
| /** | ||
| * Represents the relation of a CTE reference. | ||
| * @param cteId The ID of the corresponding CTE definition. | ||
| * @param _resolved Whether this reference is resolved. | ||
| * @param output The output attributes of this CTE reference, which can be different from | ||
| * the output of its corresponding CTE definition after attribute de-duplication. | ||
| * @param statsOpt The optional statistics inferred from the corresponding CTE definition. | ||
| * @param cteId The ID of the corresponding CTE definition. | ||
| * @param _resolved Whether this reference is resolved. | ||
| * @param output The output attributes of this CTE reference, which can be different | ||
| * from the output of its corresponding CTE definition after attribute | ||
| * de-duplication. | ||
| * @param statsOpt The optional statistics inferred from the corresponding CTE | ||
| * definition. | ||
| * @param mergedScalarSubquery If this reference points to a merged scalar subquery. | ||
| */ | ||
| case class CTERelationRef( | ||
| cteId: Long, | ||
| _resolved: Boolean, | ||
| override val output: Seq[Attribute], | ||
| statsOpt: Option[Statistics] = None) extends LeafNode with MultiInstanceRelation { | ||
| statsOpt: Option[Statistics] = None, | ||
| mergedScalarSubquery: Boolean = false) extends LeafNode with MultiInstanceRelation { | ||
|
|
||
| final override val nodePatterns: Seq[TreePattern] = Seq(CTE) | ||
|
|
||
|
|
@@ -1007,6 +1011,24 @@ case class Aggregate( | |
| } | ||
| } | ||
|
|
||
| object Aggregate { | ||
| def supportsAggregationBufferSchema(schema: StructType): Boolean = { | ||
|
||
| schema.forall(f => UnsafeRow.isMutable(f.dataType)) | ||
| } | ||
|
|
||
| def supportsHashAggregate(aggregateBufferAttributes: Seq[Attribute]): Boolean = { | ||
| val aggregationBufferSchema = StructType.fromAttributes(aggregateBufferAttributes) | ||
| supportsAggregationBufferSchema(aggregationBufferSchema) | ||
| } | ||
|
|
||
| def supportsObjectHashAggregate(aggregateExpressions: Seq[AggregateExpression]): Boolean = { | ||
| aggregateExpressions.map(_.aggregateFunction).exists { | ||
| case _: TypedImperativeAggregate[_] => true | ||
| case _ => false | ||
| } | ||
| } | ||
| } | ||
|
|
||
| case class Window( | ||
| windowExpressions: Seq[NamedExpression], | ||
| partitionSpec: Seq[Expression], | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is because with this PR some bloom filter aggregate subqueries can be merged. E.g.
=>