-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-7215] made coalesce and repartition a part of the query plan #5762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
686c90b
f2e6af1
2c349b5
fa4509f
5807e35
b1e76dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
|
|
||
| package org.apache.spark.sql.catalyst.optimizer | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions | ||
|
||
|
|
||
| import scala.collection.immutable.HashSet | ||
| import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
|
|
@@ -234,7 +236,7 @@ object NullPropagation extends Rule[LogicalPlan] { | |
| case e @ Count(expr) if !expr.nullable => Count(Literal(1)) | ||
|
|
||
| // For Coalesce, remove null literals. | ||
| case e @ Coalesce(children) => | ||
| case e @ expressions.Coalesce(children) => | ||
| val newChildren = children.filter { | ||
| case Literal(null, _) => false | ||
| case _ => true | ||
|
|
@@ -244,7 +246,7 @@ object NullPropagation extends Rule[LogicalPlan] { | |
| } else if (newChildren.length == 1) { | ||
| newChildren(0) | ||
| } else { | ||
| Coalesce(newChildren) | ||
| expressions.Coalesce(newChildren) | ||
| } | ||
|
|
||
| case e @ Substring(Literal(null, _), _, _) => Literal.create(null, e.dataType) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -310,6 +310,10 @@ case class Distinct(child: LogicalPlan) extends UnaryNode { | |
| override def output: Seq[Attribute] = child.output | ||
| } | ||
|
|
||
| case class Coalesce(numPartitions: Int, shuffle: Boolean, child: LogicalPlan) extends UnaryNode { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd consider calling this CoalescePartitions since coalesce is such a common SQL concept.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup repartition ...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apparently Repartition also exists :( I'm reverting to CoalescePartitions.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's going to be super confusing with existing repartition and CoalescePartitions. How about Repartition vs RepartitionByExpression? |
||
| override def output: Seq[Attribute] = child.output | ||
| } | ||
|
|
||
| /** | ||
| * A relation with one row. This is used in "SELECT ..." without a from clause. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -245,6 +245,19 @@ case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * Return a new RDD that has exactly `numPartitions` partitions. | ||
| */ | ||
| @DeveloperApi | ||
| case class Coalesce(numPartitions: Int, shuffle: Boolean, child: SparkPlan) extends UnaryNode { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rxin Should I rename this to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea repartition sounds better.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, or repartition sounds even better than CoalescePartitions |
||
| override def output: Seq[Attribute] = child.output | ||
|
|
||
| override def execute(): RDD[Row] = { | ||
| child.execute().map(_.copy()).coalesce(numPartitions, shuffle) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wrap the whole case as is done below