apache · HeartSaVioR · Jul 8, 2018 · Jul 9, 2018 · Jul 9, 2018 · Jul 18, 2018
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -871,6 +871,16 @@ object SQLConf {
     .intConf
     .createWithDefault(2)
 
+  val STREAMING_AGGREGATION_STATE_FORMAT_VERSION =
+    buildConf("spark.sql.streaming.aggregation.stateFormatVersion")
+      .internal()
+      .doc("State format version used by streaming aggregation operations in a streaming query. " +
+        "State between versions are tend to be incompatible, so state format version shouldn't " +
+        "be modified after running.")
+      .intConf
+      .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
+      .createWithDefault(2)
+
   val UNSUPPORTED_OPERATION_CHECK_ENABLED =
     buildConf("spark.sql.streaming.unsupportedOperationCheck")
       .internal()

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -328,10 +328,13 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
             "Streaming aggregation doesn't support group aggregate pandas UDF")
         }
 
+        val stateVersion = conf.getConf(SQLConf.STREAMING_AGGREGATION_STATE_FORMAT_VERSION)
+
         aggregate.AggUtils.planStreamingAggregation(
           namedGroupingExpressions,
           aggregateExpressions.map(expr => expr.asInstanceOf[AggregateExpression]),
           rewrittenResultExpressions,
+          stateVersion,
           planLater(child))
 
       case _ => Nil

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -256,6 +256,7 @@ object AggUtils {
       groupingExpressions: Seq[NamedExpression],
       functionsWithoutDistinct: Seq[AggregateExpression],
       resultExpressions: Seq[NamedExpression],
+      stateFormatVersion: Int,
       child: SparkPlan): Seq[SparkPlan] = {
 
     val groupingAttributes = groupingExpressions.map(_.toAttribute)
@@ -287,7 +288,8 @@ object AggUtils {
         child = partialAggregate)
     }
 
-    val restored = StateStoreRestoreExec(groupingAttributes, None, partialMerged1)
+    val restored = StateStoreRestoreExec(groupingAttributes, None, stateFormatVersion,
+      partialMerged1)
 
     val partialMerged2: SparkPlan = {
       val aggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = PartialMerge))
@@ -311,6 +313,7 @@ object AggUtils {
         stateInfo = None,
         outputMode = None,
         eventTimeWatermark = None,
+        stateFormatVersion = stateFormatVersion,
         partialMerged2)
 
     val finalAndCompleteAggregate: SparkPlan = {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -100,19 +100,21 @@ class IncrementalExecution(
   val state = new Rule[SparkPlan] {
 
     override def apply(plan: SparkPlan): SparkPlan = plan transform {
-      case StateStoreSaveExec(keys, None, None, None,
+      case StateStoreSaveExec(keys, None, None, None, stateFormatVersion,
              UnaryExecNode(agg,
-               StateStoreRestoreExec(_, None, child))) =>
+               StateStoreRestoreExec(_, None, _, child))) =>
         val aggStateInfo = nextStatefulOperationStateInfo
         StateStoreSaveExec(
           keys,
           Some(aggStateInfo),
           Some(outputMode),
           Some(offsetSeqMetadata.batchWatermarkMs),
+          stateFormatVersion,
           agg.withNewChildren(
             StateStoreRestoreExec(
               keys,
               Some(aggStateInfo),
+              stateFormatVersion,
               child) :: Nil))
 
       case StreamingDeduplicateExec(keys, child, None, None) =>

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -22,7 +22,7 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.RuntimeConfig
-import org.apache.spark.sql.execution.streaming.state.FlatMapGroupsWithStateExecHelper
+import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager}
 import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
 
 /**
@@ -89,7 +89,7 @@ object OffsetSeqMetadata extends Logging {
   private implicit val format = Serialization.formats(NoTypeHints)
   private val relevantSQLConfs = Seq(
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
-    FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION)
+    FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION)
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -104,7 +104,9 @@ object OffsetSeqMetadata extends Logging {
   private val relevantSQLConfDefaultValues = Map[String, String](
     STREAMING_MULTIPLE_WATERMARK_POLICY.key -> MultipleWatermarkPolicy.DEFAULT_POLICY_NAME,
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key ->
-      FlatMapGroupsWithStateExecHelper.legacyVersion.toString
+      FlatMapGroupsWithStateExecHelper.legacyVersion.toString,
+    STREAMING_AGGREGATION_STATE_FORMAT_VERSION.key ->
+      StreamingAggregationStateManager.legacyVersion.toString
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -20,8 +20,12 @@ package org.apache.spark.sql.execution.streaming
 import scala.reflect.ClassTag
 
 import org.apache.spark.TaskContext
+import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateUnsafeProjection, GenerateUnsafeRowJoiner}
 import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.StructType
 
@@ -81,4 +85,221 @@ package object state {
         storeCoordinator)
     }
   }
+
+  /**
+   * Base trait for state manager purposed to be used from streaming aggregations.
+   */
+  sealed trait StreamingAggregationStateManager extends Serializable {
+
+    /**
+     * Extract columns consisting key from input row, and return the new row for key columns.
+     *
+     * @param row The input row.
+     * @return The row instance which only contains key columns.
+     */
+    def getKey(row: InternalRow): UnsafeRow
+
+    /**
+     * Calculate schema for the value of state. The schema is mainly passed to the StateStoreRDD.
+     *
+     * @return An instance of StructType representing schema for the value of state.
+     */
+    def getStateValueSchema: StructType
+
+    /**
+     * Get the current value of a non-null key from the target state store.
+     *
+     * @param store The target StateStore instance.
+     * @param key The key whose associated value is to be returned.
+     * @return A non-null row if the key exists in the store, otherwise null.
+     */
+    def get(store: StateStore, key: UnsafeRow): UnsafeRow
+
+    /**
+     * Put a new value for a non-null key to the target state store. Note that key will be
+     * extracted from the input row, and the key would be same as the result of getKey(inputRow).
+     *
+     * @param store The target StateStore instance.
+     * @param row The input row.
+     */
+    def put(store: StateStore, row: UnsafeRow): Unit
+
+    /**
+     * Commit all the updates that have been made to the target state store, and return the
+     * new version.
+     *
+     * @param store The target StateStore instance.
+     * @return The new state version.
+     */
+    def commit(store: StateStore): Long
+
+    /**
+     * Remove a single non-null key from the target state store.
+     *
+     * @param store The target StateStore instance.
+     * @param key The key whose associated value is to be returned.
+     */
+    def remove(store: StateStore, key: UnsafeRow): Unit
+
+    /**
+     * Return an iterator containing all the key-value pairs in target state store.
+     */
+    def iterator(store: StateStore): Iterator[UnsafeRowPair]
+
+    /**
+     * Return an iterator containing all the keys in target state store.
+     */
+    def keys(store: StateStore): Iterator[UnsafeRow]
+
+    /**
+     * Return an iterator containing all the values in target state store.
+     */
+    def values(store: StateStore): Iterator[UnsafeRow]
+  }
+
+  object StreamingAggregationStateManager extends Logging {
+    val supportedVersions = Seq(1, 2)
+    val legacyVersion = 1
+
+    def createStateManager(
+        keyExpressions: Seq[Attribute],
+        inputRowAttributes: Seq[Attribute],
+        stateFormatVersion: Int): StreamingAggregationStateManager = {
+      stateFormatVersion match {
+        case 1 => new StreamingAggregationStateManagerImplV1(keyExpressions, inputRowAttributes)
+        case 2 => new StreamingAggregationStateManagerImplV2(keyExpressions, inputRowAttributes)
+        case _ => throw new IllegalArgumentException(s"Version $stateFormatVersion is invalid")
+      }
+    }
+  }
+
+  abstract class StreamingAggregationStateManagerBaseImpl(
+      protected val keyExpressions: Seq[Attribute],
+      protected val inputRowAttributes: Seq[Attribute]) extends StreamingAggregationStateManager {
+
+    @transient protected lazy val keyProjector =
+      GenerateUnsafeProjection.generate(keyExpressions, inputRowAttributes)
+
+    def getKey(row: InternalRow): UnsafeRow = keyProjector(row)
+
+    override def commit(store: StateStore): Long = store.commit()
+
+    override def remove(store: StateStore, key: UnsafeRow): Unit = store.remove(key)
+
+    override def keys(store: StateStore): Iterator[UnsafeRow] = {
+      // discard and don't convert values to avoid computation
+      store.getRange(None, None).map(_.key)
+    }
+  }
+
+  /**
+   * The implementation of StreamingAggregationStateManager for state version 1.
+   * In state version 1, the schema of key and value in state are follow:
+   *
+   * - key: Same as key expressions.
+   * - value: Same as input row attributes. The schema of value contains key expressions as well.
+   *
+   * This implementation only works when input row attributes contain all the key attributes.
+   *
+   * @param keyExpressions The attributes of keys.
+   * @param inputRowAttributes The attributes of input row.
+   */
+  class StreamingAggregationStateManagerImplV1(
+      keyExpressions: Seq[Attribute],
+      inputRowAttributes: Seq[Attribute])
+    extends StreamingAggregationStateManagerBaseImpl(keyExpressions, inputRowAttributes) {
+
+    override def getStateValueSchema: StructType = inputRowAttributes.toStructType
+
+    override def get(store: StateStore, key: UnsafeRow): UnsafeRow = {
+      store.get(key)
+    }
+
+    override def put(store: StateStore, row: UnsafeRow): Unit = {
+      store.put(getKey(row), row)
+    }
+
+    override def iterator(store: StateStore): Iterator[UnsafeRowPair] = {
+      store.iterator()
+    }
+
+    override def values(store: StateStore): Iterator[UnsafeRow] = {
+      store.iterator().map(_.value)
+    }
+  }
+
+  /**
+   * The implementation of StreamingAggregationStateManager for state version 2.
+   * In state version 2, the schema of key and value in state are follow:
+   *
+   * - key: Same as key expressions.
+   * - value: The diff between input row attributes and key expressions.
+   *
+   * The schema of value is changed to optimize the memory/space usage in state, via removing
+   * duplicated columns in key-value pair. Hence key columns are excluded from the schema of value.
+   *
+   * This implementation only works when input row attributes contain all the key attributes.
+   *
+   * @param keyExpressions The attributes of keys.
+   * @param inputRowAttributes The attributes of input row.
+   */
+  class StreamingAggregationStateManagerImplV2(
+      keyExpressions: Seq[Attribute],
+      inputRowAttributes: Seq[Attribute])
+    extends StreamingAggregationStateManagerBaseImpl(keyExpressions, inputRowAttributes) {
+
+    private val valueExpressions: Seq[Attribute] = inputRowAttributes.diff(keyExpressions)
+    private val keyValueJoinedExpressions: Seq[Attribute] = keyExpressions ++ valueExpressions
+    private val needToProjectToRestoreValue: Boolean =
+      keyValueJoinedExpressions != inputRowAttributes
+
+    @transient private lazy val valueProjector =
+      GenerateUnsafeProjection.generate(valueExpressions, inputRowAttributes)
+
+    @transient private lazy val joiner =
+      GenerateUnsafeRowJoiner.create(StructType.fromAttributes(keyExpressions),
+        StructType.fromAttributes(valueExpressions))
+    @transient private lazy val restoreValueProjector = GenerateUnsafeProjection.generate(
+      keyValueJoinedExpressions, inputRowAttributes)
+
+    override def getStateValueSchema: StructType = valueExpressions.toStructType
+
+    override def get(store: StateStore, key: UnsafeRow): UnsafeRow = {
+      val savedState = store.get(key)
+      if (savedState == null) {
+        return savedState
+      }
+
+      val joinedRow = joiner.join(key, savedState)
+      if (needToProjectToRestoreValue) {
+        restoreValueProjector(joinedRow)
+      } else {
+        joinedRow
+      }
+    }
+
+    override def put(store: StateStore, row: UnsafeRow): Unit = {
+      val key = keyProjector(row)
+      val value = valueProjector(row)
+      store.put(key, value)
+    }
+
+    override def iterator(store: StateStore): Iterator[UnsafeRowPair] = {
+      store.iterator().map(rowPair => new UnsafeRowPair(rowPair.key, restoreOriginRow(rowPair)))
+    }
+
+    override def values(store: StateStore): Iterator[UnsafeRow] = {
+      store.iterator().map(rowPair => restoreOriginRow(rowPair))
+    }
+
+    private def restoreOriginRow(rowPair: UnsafeRowPair): UnsafeRow = {
+      val joinedRow = joiner.join(rowPair.key, rowPair.value)
+      if (needToProjectToRestoreValue) {
+        restoreValueProjector(joinedRow)
+      } else {
+        joinedRow
+      }
+    }
+  }
+
 }