@@ -56,26 +56,24 @@ trait ShuffleExchangeLike extends Exchange {
5656 */
5757 def numPartitions : Int
5858
59- def partitioningFlexibility : PartitioningFlexibility .Value
59+ def shuffleOrigin : ShuffleOrigin .Value
6060
6161 /**
6262 * Returns whether the shuffle partition number can be changed.
6363 */
6464 final def canChangeNumPartitions : Boolean = {
6565 // If users specify the num partitions via APIs like `repartition(5, col)`, we shouldn't change
6666 // it. For `SinglePartition`, it requires exactly one partition and we can't change it either.
67- partitioningFlexibility != PartitioningFlexibility .STRICT &&
68- outputPartitioning != SinglePartition
67+ shuffleOrigin != ShuffleOrigin .REPARTITION_WITH_NUM && outputPartitioning != SinglePartition
6968 }
7069
7170 /**
72- * Returns whether the shuffle output clustering can be changed.
71+ * Returns whether the shuffle output data partitioning can be changed.
7372 */
74- final def canChangeClustering : Boolean = {
73+ final def canChangePartitioning : Boolean = {
7574 // If users specify the partitioning via APIs like `repartition(col)`, we shouldn't change it.
7675 // For `SinglePartition`, itself is a special partitioning and we can't change it either.
77- partitioningFlexibility == PartitioningFlexibility .UNSPECIFIED &&
78- outputPartitioning != SinglePartition
76+ shuffleOrigin == ShuffleOrigin .ENSURE_REQUIREMENTS && outputPartitioning != SinglePartition
7977 }
8078
8179 /**
@@ -94,16 +92,19 @@ trait ShuffleExchangeLike extends Exchange {
9492 def runtimeStatistics : Statistics
9593}
9694
97- object PartitioningFlexibility extends Enumeration {
98- type PartitioningFlexibility = Value
99- // STRICT means we can't change the partitioning at all, including the partition number, even if
100- // we lose performance improvement opportunity.
101- val STRICT = Value
102- // PRESERVE_CLUSTERING means we must preserve the data clustering even if it's useless to the
103- // downstream operators. Shuffle partition number can be changed.
104- val PRESERVE_CLUSTERING = Value
105- // UNSPECIFIED means the partitioning can be changed as long as it doesn't break query semantic.
106- val UNSPECIFIED = Value
95+ // Describes where the shuffle operator comes from.
96+ object ShuffleOrigin extends Enumeration {
97+ type ShuffleOrigin = Value
98+ // Indicates that the shuffle operator was added by the internal `EnsureRequirements` rule. It
99+ // means that the shuffle operator is used to ensure internal data partitioning requirements and
100+ // Spark is free to optimize it as long as the requirements are still ensured.
101+ val ENSURE_REQUIREMENTS = Value
102+ // Indicates that the shuffle operator was added by the user-specified repartition operator. Spark
103+ // can still optimize it via changing shuffle partition number, as data partitioning won't change.
104+ val REPARTITION = Value
105+ // Indicates that the shuffle operator was added by the user-specified repartition operator with
106+ // a certain partition number. Spark can't optimize it.
107+ val REPARTITION_WITH_NUM = Value
107108}
108109
109110/**
@@ -112,7 +113,7 @@ object PartitioningFlexibility extends Enumeration {
112113case class ShuffleExchangeExec (
113114 override val outputPartitioning : Partitioning ,
114115 child : SparkPlan ,
115- partitioningFlexibility : PartitioningFlexibility .Value = PartitioningFlexibility . UNSPECIFIED )
116+ shuffleOrigin : ShuffleOrigin .Value = ShuffleOrigin . ENSURE_REQUIREMENTS )
116117 extends ShuffleExchangeLike {
117118
118119 private lazy val writeMetrics =
0 commit comments