apache · maropu · Sep 13, 2016 · Sep 22, 2016 · Mar 3, 2017 · kiszk
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -103,6 +103,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PREFER_SORTAGGREGATE = buildConf("spark.sql.aggregate.preferSortAggregate")
+    .internal()
+    .doc("When true, prefer sort aggregate over shuffle hash aggregate.")
+    .booleanConf
+    .createWithDefault(false)
+
   val RADIX_SORT_ENABLED = buildConf("spark.sql.sort.enableRadixSort")
     .internal()
     .doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
@@ -853,6 +859,8 @@ class SQLConf extends Serializable with Logging {
 
   def preferSortMergeJoin: Boolean = getConf(PREFER_SORTMERGEJOIN)
 
+  def preferSortAggregate: Boolean = getConf(PREFER_SORTAGGREGATE)
+
   def enableRadixSort: Boolean = getConf(RADIX_SORT_ENABLED)
 
   def defaultSizeInBytes: Long = getConf(DEFAULT_SIZE_IN_BYTES)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.SQLConf
@@ -38,7 +38,8 @@ trait CodegenSupport extends SparkPlan {
 
   /** Prefix used in the current operator's variable names. */
   private def variablePrefix: String = this match {
-    case _: HashAggregateExec => "agg"
+    case _: HashAggregateExec => "hagg"
+    case _: SortAggregateExec => "sagg"
     case _: BroadcastHashJoinExec => "bhj"
     case _: SortMergeJoinExec => "smj"
     case _: RDDScanExec => "rdd"

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.{StateStoreRestoreExec, StateStoreSaveExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Utility functions used by the query planner to convert our plan to new aggregation code path.
@@ -35,41 +34,51 @@ object AggUtils {
       initialInputBufferOffset: Int = 0,
       resultExpressions: Seq[NamedExpression] = Nil,
       child: SparkPlan): SparkPlan = {
-    val useHash = HashAggregateExec.supportsAggregate(
-      aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
-    if (useHash) {
-      HashAggregateExec(
-        requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-        groupingExpressions = groupingExpressions,
-        aggregateExpressions = aggregateExpressions,
-        aggregateAttributes = aggregateAttributes,
-        initialInputBufferOffset = initialInputBufferOffset,
-        resultExpressions = resultExpressions,
-        child = child)
-    } else {
-      val objectHashEnabled = child.sqlContext.conf.useObjectHashAggregation
-      val useObjectHash = ObjectHashAggregateExec.supportsAggregate(aggregateExpressions)
-
-      if (objectHashEnabled && useObjectHash) {
-        ObjectHashAggregateExec(
+    val hashAggregateOption = {
+      val preferSortAggregate = child.sqlContext.conf.preferSortAggregate
+      val useHash = HashAggregateExec.supportsAggregate(
+        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
+      if (preferSortAggregate) {
+        None
+      } else if (useHash) {
+        val agg = HashAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
           groupingExpressions = groupingExpressions,
           aggregateExpressions = aggregateExpressions,
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
           resultExpressions = resultExpressions,
           child = child)
+        Some(agg)
       } else {
-        SortAggregateExec(
-          requiredChildDistributionExpressions = requiredChildDistributionExpressions,
-          groupingExpressions = groupingExpressions,
-          aggregateExpressions = aggregateExpressions,
-          aggregateAttributes = aggregateAttributes,
-          initialInputBufferOffset = initialInputBufferOffset,
-          resultExpressions = resultExpressions,
-          child = child)
+        val objectHashEnabled = child.sqlContext.conf.useObjectHashAggregation
+        val useObjectHash = ObjectHashAggregateExec.supportsAggregate(aggregateExpressions)
+
+        if (objectHashEnabled && useObjectHash) {
+          val agg = ObjectHashAggregateExec(
+            requiredChildDistributionExpressions = requiredChildDistributionExpressions,
+            groupingExpressions = groupingExpressions,
+            aggregateExpressions = aggregateExpressions,
+            aggregateAttributes = aggregateAttributes,
+            initialInputBufferOffset = initialInputBufferOffset,
+            resultExpressions = resultExpressions,
+            child = child)
+          Some(agg)
+        } else {
+          None
+        }
       }
     }
+    hashAggregateOption.getOrElse {
+      SortAggregateExec(
+        requiredChildDistributionExpressions = requiredChildDistributionExpressions,
+        groupingExpressions = groupingExpressions,
+        aggregateExpressions = aggregateExpressions,
+        aggregateAttributes = aggregateAttributes,
+        initialInputBufferOffset = initialInputBufferOffset,
+        resultExpressions = resultExpressions,
+        child = child)
+    }
   }
 
   def planAggregateWithoutDistinct(

diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenHelper.scala b/...core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenHelper.scala
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.aggregate
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.execution.CodegenSupport
+import org.apache.spark.sql.types.StructType
+
+trait AggregateCodegenHelper {
+  self: AggregateExec with CodegenSupport =>
+
+  protected val groupingAttributes = groupingExpressions.map(_.toAttribute)
+  protected val groupingKeySchema = StructType.fromAttributes(groupingAttributes)
+  protected val bufferSchema = StructType.fromAttributes(aggregateBufferAttributes)
+
+  protected lazy val declFunctions =
+    aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
+
+  protected var bufVars: Seq[ExprCode] = _
+
+  override def usedInputs: AttributeSet = inputSet
+
+  protected def generateBufVarsInitCode(ctx: CodegenContext): String = {
+    // generate variables for aggregation buffer
+    val initExpr = declFunctions.flatMap(f => f.initialValues)
+    bufVars = initExpr.map { e =>
+      val isNull = ctx.freshName("bufIsNull")
+      val value = ctx.freshName("bufValue")
+      ctx.addMutableState("boolean", isNull, "")
+      ctx.addMutableState(ctx.javaType(e.dataType), value, "")
+      // The initial expression should not access any column
+      val ev = e.genCode(ctx)
+      val initVars = s"""
+         | $isNull = ${ev.isNull};
+         | $value = ${ev.value};
+       """.stripMargin
+      ExprCode(ev.code + initVars, isNull, value)
+    }
+    evaluateVariables(bufVars)
+  }
+
+  protected def generateBufVarsEvalCode(ctx: CodegenContext): String = {
+    val initAgg = ctx.freshName("initAgg")
+    ctx.addMutableState("boolean", initAgg, s"$initAgg = false;")
+
+    val initBufVar = generateBufVarsInitCode(ctx)
+
+    // generate variables for output
+    val (resultVars, genResult) = if (modes.contains(Final) || modes.contains(Complete)) {
+      // evaluate aggregate results
+      ctx.currentVars = bufVars
+      val aggResults = declFunctions.map(_.evaluateExpression).map { e =>
+        BindReferences.bindReference(e, aggregateBufferAttributes).genCode(ctx)
+      }
+      val evaluateAggResults = evaluateVariables(aggResults)
+      // evaluate result expressions
+      ctx.currentVars = aggResults
+      val resultVars = resultExpressions.map { e =>
+        BindReferences.bindReference(e, aggregateAttributes).genCode(ctx)
+      }
+      (resultVars, s"""
+        |$evaluateAggResults
+        |${evaluateVariables(resultVars)}
+       """.stripMargin)
+    } else if (modes.contains(Partial) || modes.contains(PartialMerge)) {
+      // output the aggregate buffer directly
+      (bufVars, "")
+    } else {
+      // no aggregate function, the result should be literals
+      val resultVars = resultExpressions.map(_.genCode(ctx))
+      (resultVars, evaluateVariables(resultVars))
+    }
+
+    val doAgg = ctx.freshName("doAggregateWithoutKey")
+    ctx.addNewFunction(doAgg,
+      s"""
+         | private void $doAgg() throws java.io.IOException {
+         |   // initialize aggregation buffer
+         |   $initBufVar
+         |
+         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
+         | }
+       """.stripMargin)
+
+    val numOutput = metricTerm(ctx, "numOutputRows")
+    val aggTime = metricTerm(ctx, "aggTime")
+    val beforeAgg = ctx.freshName("beforeAgg")
+    s"""
+       | while (!$initAgg) {
+       |   $initAgg = true;
+       |   long $beforeAgg = System.nanoTime();
+       |   $doAgg();
+       |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
+       |
+       |   // output the result
+       |   ${genResult.trim}
+       |
+       |   $numOutput.add(1);
+       |   ${consume(ctx, resultVars).trim}
+       | }
+     """.stripMargin
+  }
+
+  protected def generateBufVarsUpdateCode(ctx: CodegenContext, input: Seq[ExprCode]): String = {
+    // only have DeclarativeAggregate
+    val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
+    val inputAttrs = functions.flatMap(_.aggBufferAttributes) ++ child.output
+    val updateExpr = aggregateExpressions.flatMap { e =>
+      e.mode match {
+        case Partial | Complete =>
+          e.aggregateFunction.asInstanceOf[DeclarativeAggregate].updateExpressions
+        case PartialMerge | Final =>
+          e.aggregateFunction.asInstanceOf[DeclarativeAggregate].mergeExpressions
+      }
+    }
+    ctx.currentVars = bufVars ++ input
+    val boundUpdateExpr = updateExpr.map(BindReferences.bindReference(_, inputAttrs))
+    val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+    val effectiveCodes = subExprs.codes.mkString("\n")
+    val aggVals = ctx.withSubExprEliminationExprs(subExprs.states) {
+      boundUpdateExpr.map(_.genCode(ctx))
+    }
+    // aggregate buffer should be updated atomic
+    val updates = aggVals.zipWithIndex.map { case (ev, i) =>
+      s"""
+         | ${bufVars(i).isNull} = ${ev.isNull};
+         | ${bufVars(i).value} = ${ev.value};
+       """.stripMargin
+    }
+    s"""
+       | // do aggregate
+       | // common sub-expressions
+       | $effectiveCodes
+       | // evaluate aggregate function
+       | ${evaluateVariables(aggVals)}
+       | // update aggregation buffer
+       | ${updates.mkString("\n").trim}
+     """.stripMargin
+  }
+
+  /**
+   * This is called by generated Java class, should be public.
+   */
+  def createUnsafeJoiner(): UnsafeRowJoiner = {
+    GenerateUnsafeRowJoiner.create(groupingKeySchema, bufferSchema)
+  }
+
+  /**
+   * Generate the code for output.
+   */
+  protected def generateResultCode(
+      ctx: CodegenContext,
+      keyTerm: String,
+      bufferTerm: String,
+      self: String): String = {
+    if (modes.contains(Final) || modes.contains(Complete)) {
+      // generate output using resultExpressions
+      ctx.currentVars = null
+      ctx.INPUT_ROW = keyTerm
+      val keyVars = groupingExpressions.zipWithIndex.map { case (e, i) =>
+        BoundReference(i, e.dataType, e.nullable).genCode(ctx)
+      }
+      val evaluateKeyVars = evaluateVariables(keyVars)
+      ctx.INPUT_ROW = bufferTerm
+      val bufferVars = aggregateBufferAttributes.zipWithIndex.map { case (e, i) =>
+        BoundReference(i, e.dataType, e.nullable).genCode(ctx)
+      }
+      val evaluateBufferVars = evaluateVariables(bufferVars)
+      // evaluate the aggregation result
+      ctx.currentVars = bufferVars
+      val aggResults = declFunctions.map(_.evaluateExpression).map { e =>
+        BindReferences.bindReference(e, aggregateBufferAttributes).genCode(ctx)
+      }
+      val evaluateAggResults = evaluateVariables(aggResults)
+      // generate the final result
+      ctx.currentVars = keyVars ++ aggResults
+      val inputAttrs = groupingAttributes ++ aggregateAttributes
+      val resultVars = resultExpressions.map { e =>
+        BindReferences.bindReference(e, inputAttrs).genCode(ctx)
+      }
+      s"""
+       $evaluateKeyVars
+       $evaluateBufferVars
+       $evaluateAggResults
+       ${consume(ctx, resultVars)}
+       """
+
+    } else if (modes.contains(Partial) || modes.contains(PartialMerge)) {
+      // This should be the last operator in a stage, we should output UnsafeRow directly
+      val joinerTerm = ctx.freshName("unsafeRowJoiner")
+      ctx.addMutableState(classOf[UnsafeRowJoiner].getName, joinerTerm,
+        s"$joinerTerm = $self.createUnsafeJoiner();")
+      val resultRow = ctx.freshName("resultRow")
+      s"""
+       UnsafeRow $resultRow = $joinerTerm.join($keyTerm, $bufferTerm);
+       ${consume(ctx, null, resultRow)}
+       """
+
+    } else {
+      // generate result based on grouping key
+      ctx.INPUT_ROW = keyTerm
+      ctx.currentVars = null
+      val eval = resultExpressions.map{ e =>
+        BindReferences.bindReference(e, groupingAttributes).genCode(ctx)
+      }
+      consume(ctx, eval)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateExec.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.aggregate
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.UnaryExecNode
+
+/**
+ * A base class for aggregate implementation.
+ */
+abstract class AggregateExec extends UnaryExecNode {
+
+  def requiredChildDistributionExpressions: Option[Seq[Expression]]
+  def groupingExpressions: Seq[NamedExpression]
+  def aggregateExpressions: Seq[AggregateExpression]
+  def aggregateAttributes: Seq[Attribute]
+  def initialInputBufferOffset: Int
+  def resultExpressions: Seq[NamedExpression]
+  def child: SparkPlan
+
+  // all the mode of aggregate expressions
+  protected val modes = aggregateExpressions.map(_.mode).distinct
+
+  protected val aggregateBufferAttributes = {
+    aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
+  }
+}