apache · zecevicp · Mar 22, 2018 · Mar 22, 2018 · Apr 4, 2018 · Apr 5, 2018
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -41,7 +41,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
    */
   def isWatermarkInJoinKeys(plan: LogicalPlan): Boolean = {
     plan match {
-      case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, _) =>
+      case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, _, _) =>
         (leftKeys ++ rightKeys).exists {
           case a: AttributeReference => a.metadata.contains(EventTimeWatermark.delayKey)
           case _ => false

diff --git a/...t/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/...t/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.catalyst.planning
 
+import scala.collection.mutable
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A pattern that matches any number of project or filter operations on top of another relational
@@ -98,9 +101,10 @@ object PhysicalOperation extends PredicateHelper {
  * value).
  */
 object ExtractEquiJoinKeys extends Logging with PredicateHelper {
-  /** (joinType, leftKeys, rightKeys, condition, leftChild, rightChild) */
+  /** (joinType, leftKeys, rightKeys, rangeConditions, condition, leftChild, rightChild) */
   type ReturnType =
-    (JoinType, Seq[Expression], Seq[Expression], Option[Expression], LogicalPlan, LogicalPlan)
+    (JoinType, Seq[Expression], Seq[Expression], Seq[BinaryComparison],
+      Option[Expression], LogicalPlan, LogicalPlan)
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = plan match {
     case join @ Join(left, right, joinType, condition) =>
@@ -132,13 +136,97 @@ object ExtractEquiJoinKeys extends Logging with PredicateHelper {
 
       if (joinKeys.nonEmpty) {
         val (leftKeys, rightKeys) = joinKeys.unzip
-        logDebug(s"leftKeys:$leftKeys | rightKeys:$rightKeys")
-        Some((joinType, leftKeys, rightKeys, otherPredicates.reduceOption(And), left, right))
+        // Find any simple range expressions between two columns
+        // (and involving only those two columns) of the two tables being joined,
+        // which are not used in the equijoin expressions,
+        // and which can be used for secondary sort optimizations.
+        // rangePreds will contain the original expressions to be filtered out later.
+        val rangePreds = mutable.Set.empty[Expression]
+        var rangeConditions: Seq[BinaryComparison] =
+          if (SQLConf.get.useSmjInnerRangeOptimization) {
+            otherPredicates.flatMap {
+              case p@LessThan(l, r) => checkRangeConditions(l, r, left, right, joinKeys).map {
+                case true => rangePreds.add(p); GreaterThan(r, l)
+                case false => rangePreds.add(p); p
+              }
+              case p@LessThanOrEqual(l, r) =>
+                checkRangeConditions(l, r, left, right, joinKeys).map {
+                  case true => rangePreds.add(p); GreaterThanOrEqual(r, l)
+                  case false => rangePreds.add(p); p
+                }
+              case p@GreaterThan(l, r) => checkRangeConditions(l, r, left, right, joinKeys).map {
+                case true => rangePreds.add(p); LessThan(r, l)
+                case false => rangePreds.add(p); p
+              }
+              case p@GreaterThanOrEqual(l, r) =>
+                checkRangeConditions(l, r, left, right, joinKeys).map {
+                  case true => rangePreds.add(p); LessThanOrEqual(r, l)
+                  case false => rangePreds.add(p); p
+                }
+              case _ => None
+            }
+          } else {
+            Nil
+          }
+
+        // Only using secondary join optimization when both lower and upper conditions
+        // are specified (e.g. t1.a < t2.b + x and t1.a > t2.b - x)
+        if (rangeConditions.size != 2 ||
+            // Looking for one < and one > comparison:
+            rangeConditions.forall(x => !x.isInstanceOf[LessThan] &&
+              !x.isInstanceOf[LessThanOrEqual]) ||
+            rangeConditions.forall(x => !x.isInstanceOf[GreaterThan] &&
+              !x.isInstanceOf[GreaterThanOrEqual]) ||
+            // Check if both comparisons reference the same columns:
+            rangeConditions.flatMap(c => c.left.references.toSeq.distinct).distinct.size != 1 ||
+            rangeConditions.flatMap(c => c.right.references.toSeq.distinct).distinct.size != 1) {
+          logDebug("Inner range optimization conditions not met. Clearing range conditions")
+          rangeConditions = Nil
+          rangePreds.clear()
+        }
+
+        Some((joinType, leftKeys, rightKeys, rangeConditions,
+          otherPredicates.filterNot(rangePreds.contains(_)).reduceOption(And), left, right))
       } else {
         None
       }
     case _ => None
   }
+
+  /**
+   * Checks if l and r are valid range conditions:
+   *   - l and r expressions should both contain a single reference to one and the same column
+   *   - the referenced column should not be part of joinKeys
+   * If these conditions are not met, the function returns None.
+   *
+   * Otherwise, the function checks if the left plan contains l expression and the right plan
+   * contains r expression. If the expressions need to be switched, the function returns Some(true)
+   * and Some(false) otherwise.
+   */
+  private def checkRangeConditions(l : Expression, r : Expression,
+      left : LogicalPlan, right : LogicalPlan,
+      joinKeys : Seq[(Expression, Expression)]): Option[Boolean] = {
+    val (lattrs, rattrs) = (l.references.toSeq, r.references.toSeq)
+    if (lattrs.size != 1 || rattrs.size != 1) {
+      None
+    } else if (canEvaluate(l, left) && canEvaluate(r, right)) {
+      if (joinKeys.exists { case (ljk : Expression, rjk : Expression) =>
+          ljk.references.toSeq.contains(lattrs(0)) && rjk.references.toSeq.contains(rattrs(0)) }) {
+        None
+      } else {
+        Some(false)
+      }
+    } else if (canEvaluate(l, right) && canEvaluate(r, left)) {
+      if (joinKeys.exists{ case (ljk : Expression, rjk : Expression) =>
+        rjk.references.toSeq.contains(lattrs(0)) && ljk.references.toSeq.contains(rattrs(0)) }) {
+        None
+      } else {
+        Some(true)
+      }
+    } else {
+      None
+    }
+  }
 }
 
 /**

diff --git a/...in/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/...in/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -56,7 +56,7 @@ case class JoinEstimation(join: Join) extends Logging {
     case _ if !rowCountsExist(join.left, join.right) =>
       None
 
-    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _) =>
+    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _, _) =>
       // 1. Compute join selectivity
       val joinKeyPairs = extractJoinKeysWithColStats(leftKeys, rightKeys)
       val (numInnerJoinedRows, keyStatsAfterJoin) = computeCardinalityAndStats(joinKeyPairs)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1492,6 +1492,19 @@ object SQLConf {
       .intConf
       .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
 
+  val USE_SMJ_INNER_RANGE_OPTIMIZATION =
+    buildConf("spark.sql.join.smj.useInnerRangeOptimization")
+      .internal()
+      .doc("Sort-merge join 'inner range optimization' is applicable in cases where the join " +
+        "condition includes equality expressions on pairs of columns and a range expression " +
+        "involving two other columns, (e.g. t1.x = t2.x AND t1.y BETWEEN t2.y - d AND t2.y + d)." +
+        " If the inner range optimization is enabled, the number of rows considered for each " +
+        "match of equality conditions can be reduced considerably because a moving window, " +
+        "corresponding to the range conditions, will be used for iterating over matched rows " +
+        "in the right relation.")
+      .booleanConf
+      .createWithDefault(true)
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -1818,6 +1831,8 @@ class SQLConf extends Serializable with Logging {
 
   def topKSortFallbackThreshold: Int = getConf(TOP_K_SORT_FALLBACK_THRESHOLD)
 
+  def useSmjInnerRangeOptimization: Boolean = getConf(USE_SMJ_INNER_RANGE_OPTIMIZATION)
+
   def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)
 
   /**

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/...core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import java.util.ConcurrentModificationException
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, Queue}
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.internal.Logging
@@ -41,12 +41,16 @@ import org.apache.spark.util.collection.unsafe.sort.{UnsafeExternalSorter, Unsaf
  * - If [[numRowsSpillThreshold]] is too low, data will be spilled frequently and lead to
  *   excessive disk writes. This may lead to a performance regression compared to the normal case
  *   of using an [[ArrayBuffer]] or [[Array]].
+ *
+ * If [[asQueue]] is set to true, the class will function as a queue, supporting peek() and
+ * dequeue() operations.
  */
 private[sql] class ExternalAppendOnlyUnsafeRowArray(
     taskMemoryManager: TaskMemoryManager,
     blockManager: BlockManager,
     serializerManager: SerializerManager,
     taskContext: TaskContext,
+    asQueue: Boolean,
     initialSize: Int,
     pageSizeBytes: Long,
     numRowsInMemoryBufferThreshold: Int,
@@ -58,6 +62,20 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
       SparkEnv.get.blockManager,
       SparkEnv.get.serializerManager,
       TaskContext.get(),
+      false,
+      1024,
+      SparkEnv.get.memoryManager.pageSizeBytes,
+      numRowsInMemoryBufferThreshold,
+      numRowsSpillThreshold)
+  }
+
+  def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int, asQueue: Boolean) {
+    this(
+      TaskContext.get().taskMemoryManager(),
+      SparkEnv.get.blockManager,
+      SparkEnv.get.serializerManager,
+      TaskContext.get(),
+      asQueue,
       1024,
       SparkEnv.get.memoryManager.pageSizeBytes,
       numRowsInMemoryBufferThreshold,
@@ -67,7 +85,13 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
   private val initialSizeOfInMemoryBuffer =
     Math.min(DefaultInitialSizeOfInMemoryBuffer, numRowsInMemoryBufferThreshold)
 
-  private val inMemoryBuffer = if (initialSizeOfInMemoryBuffer > 0) {
+  private val inMemoryQueue = if (asQueue && initialSizeOfInMemoryBuffer > 0) {
+    new Queue[UnsafeRow]()
+  } else {
+    null
+  }
+
+  private val inMemoryBuffer = if (!asQueue && initialSizeOfInMemoryBuffer > 0) {
     new ArrayBuffer[UnsafeRow](initialSizeOfInMemoryBuffer)
   } else {
     null
@@ -76,6 +100,9 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
   private var spillableArray: UnsafeExternalSorter = _
   private var numRows = 0
 
+  // Used when functioning as a queue to allow skipping 'dequeued' items
+  private var spillableArrayOffset = 0
+
   // A counter to keep track of total modifications done to this array since its creation.
   // This helps to invalidate iterators when there are changes done to the backing array.
   private var modificationsCount: Long = 0
@@ -95,17 +122,60 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
       // inside `UnsafeExternalSorter`
       spillableArray.cleanupResources()
       spillableArray = null
+      spillableArrayOffset = 0
     } else if (inMemoryBuffer != null) {
       inMemoryBuffer.clear()
+    } else if (inMemoryQueue != null) {
+      inMemoryQueue.clear()
     }
     numFieldsPerRow = 0
     numRows = 0
     modificationsCount += 1
   }
 
+  def dequeue(): Option[UnsafeRow] = {
+    if (!asQueue) {
+      throw new IllegalStateException("Not instantiated as a queue!")
+    }
+    if (numRows == 0) {
+      None
+    }
+    else if (spillableArray != null) {
+      val retval = Some(generateIterator().next)
+      numRows -= 1
+      modificationsCount += 1
+      spillableArrayOffset += 1
+      retval
+    }
+    else {
+      numRows -= 1
+      modificationsCount += 1
+      Some(inMemoryQueue.dequeue())
+    }
+  }
+
+  def peek(): Option[UnsafeRow] = {
+    if (!asQueue) {
+      throw new IllegalStateException("Not instantiated as a queue!")
+    }
+    if (numRows == 0) {
+      None
+    }
+    else if (spillableArray != null) {
+      Some(generateIterator().next)
+    }
+    else {
+      Some(inMemoryQueue(0))
+    }
+  }
+
   def add(unsafeRow: UnsafeRow): Unit = {
-    if (numRows < numRowsInMemoryBufferThreshold) {
-      inMemoryBuffer += unsafeRow.copy()
+    if (spillableArray == null && numRows < numRowsInMemoryBufferThreshold) {
+      if (asQueue) {
+        inMemoryQueue += unsafeRow.copy()
+      } else {
+        inMemoryBuffer += unsafeRow.copy()
+      }
     } else {
       if (spillableArray == null) {
         logInfo(s"Reached spill threshold of $numRowsInMemoryBufferThreshold rows, switching to " +
@@ -124,8 +194,21 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
           numRowsSpillThreshold,
           false)
 
+        spillableArrayOffset = 0
+
         // populate with existing in-memory buffered rows
-        if (inMemoryBuffer != null) {
+        if (asQueue && inMemoryQueue != null) {
+          inMemoryQueue.foreach(existingUnsafeRow =>
+            spillableArray.insertRecord(
+              existingUnsafeRow.getBaseObject,
+              existingUnsafeRow.getBaseOffset,
+              existingUnsafeRow.getSizeInBytes,
+              0,
+              false)
+          )
+          inMemoryQueue.clear()
+        }
+        if (!asQueue && inMemoryBuffer != null) {
           inMemoryBuffer.foreach(existingUnsafeRow =>
             spillableArray.insertRecord(
               existingUnsafeRow.getBaseObject,
@@ -168,7 +251,8 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
     if (spillableArray == null) {
       new InMemoryBufferIterator(startIndex)
     } else {
-      new SpillableArrayIterator(spillableArray.getIterator(startIndex), numFieldsPerRow)
+      new SpillableArrayIterator(spillableArray.getIterator(startIndex + spillableArrayOffset),
+        numFieldsPerRow)
     }
   }
 
@@ -198,7 +282,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
 
     override def next(): UnsafeRow = {
       throwExceptionIfModified()
-      val result = inMemoryBuffer(currentIndex)
+      val result = if (asQueue) inMemoryQueue(currentIndex) else inMemoryBuffer(currentIndex)
       currentIndex += 1
       result
     }