[SPARK-32383][SQL] Preserve hash join (BHJ and SHJ) stream side ordering

c21 · cloud-fan · commit 01cf8a4ce8a9 · 2020-07-27T04:51:32.000Z
### What changes were proposed in this pull request? Currently `BroadcastHashJoinExec` and `ShuffledHashJoinExec` do not preserve children output ordering information (inherit from `SparkPlan.outputOrdering`, which is Nil). This can add unnecessary sort in complex queries involved multiple joins. Example: ``` withSQLConf( SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") { val df1 = spark.range(100).select($"id".as("k1")) val df2 = spark.range(100).select($"id".as("k2")) val df3 = spark.range(3).select($"id".as("k3")) val df4 = spark.range(100).select($"id".as("k4")) val plan = df1.join(df2, $"k1" === $"k2") .join(df3, $"k1" === $"k3") .join(df4, $"k1" === $"k4") .queryExecution .executedPlan } ``` Current physical plan (extra sort on `k1` before top sort merge join): ``` *(9) SortMergeJoin [k1#220L], [k4#232L], Inner :- *(6) Sort [k1#220L ASC NULLS FIRST], false, 0 : +- *(6) BroadcastHashJoin [k1#220L], [k3#228L], Inner, BuildRight : :- *(6) SortMergeJoin [k1#220L], [k2#224L], Inner : : :- *(2) Sort [k1#220L ASC NULLS FIRST], false, 0 : : : +- Exchange hashpartitioning(k1#220L, 5), true, [id=#128] : : : +- *(1) Project [id#218L AS k1#220L] : : : +- *(1) Range (0, 100, step=1, splits=2) : : +- *(4) Sort [k2#224L ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(k2#224L, 5), true, [id=#134] : : +- *(3) Project [id#222L AS k2#224L] : : +- *(3) Range (0, 100, step=1, splits=2) : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false])), [id=#141] : +- *(5) Project [id#226L AS k3#228L] : +- *(5) Range (0, 3, step=1, splits=2) +- *(8) Sort [k4#232L ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(k4#232L, 5), true, [id=#148] +- *(7) Project [id#230L AS k4#232L] +- *(7) Range (0, 100, step=1, splits=2) ``` Ideal physical plan (no extra sort on `k1` before top sort merge join): ``` *(9) SortMergeJoin [k1#220L], [k4#232L], Inner :- *(6) BroadcastHashJoin [k1#220L], [k3#228L], Inner, BuildRight : :- *(6) SortMergeJoin [k1#220L], [k2#224L], Inner : : :- *(2) Sort [k1#220L ASC NULLS FIRST], false, 0 : : : +- Exchange hashpartitioning(k1#220L, 5), true, [id=#127] : : : +- *(1) Project [id#218L AS k1#220L] : : : +- *(1) Range (0, 100, step=1, splits=2) : : +- *(4) Sort [k2#224L ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(k2#224L, 5), true, [id=#133] : : +- *(3) Project [id#222L AS k2#224L] : : +- *(3) Range (0, 100, step=1, splits=2) : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false])), [id=#140] : +- *(5) Project [id#226L AS k3#228L] : +- *(5) Range (0, 3, step=1, splits=2) +- *(8) Sort [k4#232L ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(k4#232L, 5), true, [id=#146] +- *(7) Project [id#230L AS k4#232L] +- *(7) Range (0, 100, step=1, splits=2) ``` ### Why are the changes needed? To avoid unnecessary sort in query, and it has most impact when users read sorted bucketed table. Though the unnecessary sort is operating on already sorted data, it would have obvious negative impact on IO and query run time if the data is large and external sorting happens. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit test in `JoinSuite`. Closes #29181 from c21/ordering. Authored-by: Cheng Su <chengsu@fb.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -52,7 +52,41 @@ trait HashJoin extends BaseJoinExec {
     }
   }
 
-  override def outputPartitioning: Partitioning = streamedPlan.outputPartitioning
+  override def outputPartitioning: Partitioning = buildSide match {
+    case BuildLeft =>
+      joinType match {
+        case _: InnerLike | RightOuter => right.outputPartitioning
+        case x =>
+          throw new IllegalArgumentException(
+            s"HashJoin should not take $x as the JoinType with building left side")
+      }
+    case BuildRight =>
+      joinType match {
+        case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin =>
+          left.outputPartitioning
+        case x =>
+          throw new IllegalArgumentException(
+            s"HashJoin should not take $x as the JoinType with building right side")
+      }
+  }
+
+  override def outputOrdering: Seq[SortOrder] = buildSide match {
+    case BuildLeft =>
+      joinType match {
+        case _: InnerLike | RightOuter => right.outputOrdering
+        case x =>
+          throw new IllegalArgumentException(
+            s"HashJoin should not take $x as the JoinType with building left side")
+      }
+    case BuildRight =>
+      joinType match {
+        case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin =>
+          left.outputOrdering
+        case x =>
+          throw new IllegalArgumentException(
+            s"HashJoin should not take $x as the JoinType with building right side")
+      }
+  }
 
   protected lazy val (buildPlan, streamedPlan) = buildSide match {
     case BuildLeft => (left, right)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1104,4 +1104,47 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       })
     }
   }
+
+  test("SPARK-32383: Preserve hash join (BHJ and SHJ) stream side ordering") {
+    val df1 = spark.range(100).select($"id".as("k1"))
+    val df2 = spark.range(100).select($"id".as("k2"))
+    val df3 = spark.range(3).select($"id".as("k3"))
+    val df4 = spark.range(100).select($"id".as("k4"))
+
+    // Test broadcast hash join
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") {
+      Seq("inner", "left_outer").foreach(joinType => {
+        val plan = df1.join(df2, $"k1" === $"k2", joinType)
+          .join(df3, $"k1" === $"k3", joinType)
+          .join(df4, $"k1" === $"k4", joinType)
+          .queryExecution
+          .executedPlan
+        assert(plan.collect { case _: SortMergeJoinExec => true }.size === 2)
+        assert(plan.collect { case _: BroadcastHashJoinExec => true }.size === 1)
+        // No extra sort before last sort merge join
+        assert(plan.collect { case _: SortExec => true }.size === 3)
+      })
+    }
+
+    // Test shuffled hash join
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
+      SQLConf.SHUFFLE_PARTITIONS.key -> "2",
+      SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
+      val df3 = spark.range(10).select($"id".as("k3"))
+
+      Seq("inner", "left_outer").foreach(joinType => {
+        val plan = df1.join(df2, $"k1" === $"k2", joinType)
+          .join(df3, $"k1" === $"k3", joinType)
+          .join(df4, $"k1" === $"k4", joinType)
+          .queryExecution
+          .executedPlan
+        assert(plan.collect { case _: SortMergeJoinExec => true }.size === 2)
+        assert(plan.collect { case _: ShuffledHashJoinExec => true }.size === 1)
+        // No extra sort before last sort merge join
+        assert(plan.collect { case _: SortExec => true }.size === 3)
+      })
+    }
+  }
 }