apache · ghost · Mar 6, 2017 · Mar 7, 2017 · Mar 8, 2017 · Mar 9, 2017
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -295,8 +295,6 @@ class KafkaSourceSuite extends KafkaSourceTest {
       StopStream,
       StartStream(ProcessingTime(100), clock),
       waitUntilBatchProcessed,
-      AdvanceManualClock(100),
-      waitUntilBatchProcessed,
       // smallest now empty, 1 more from middle, 9 more from biggest
       CheckAnswer(1, 10, 100, 101, 102, 103, 104, 105, 106, 107,
         11, 108, 109, 110, 111, 112, 113, 114, 115, 116,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetCommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetCommitLog.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.{InputStream, OutputStream}
+import java.nio.charset.StandardCharsets._
+
+import scala.io.{Source => IOSource}
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Used to write log files that represent commit points in structured streaming.
+ * A log file will be written immediately after the successful completion of a
+ * batch, and before processing the next batch. Here is an execution summary:
+ * - trigger batch 1
+ * - obtain batch 1 offsets and write to offset log
+ * - process batch 1
+ * - write batch 1 to commit log
+ * - trigger batch 2
+ * - obtain bactch 2 offsets and write to offset log
+ * - process batch 2
+ * - write batch 2 to commit log
+ * ....
+ *
+ * The current format of the commit log is:
+ * line 1: version
+ * line 2: metadata (optional json string)
+ */
+class OffsetCommitLog(sparkSession: SparkSession, path: String)
+  extends HDFSMetadataLog[Option[String]](sparkSession, path) {
+
+  override protected def deserialize(in: InputStream): Option[String] = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
+    if (!lines.hasNext) {
+      throw new IllegalStateException("Incomplete log file")
+    }
+    val version = lines.next().trim.toInt
+    if (OffsetCommitLog.VERSION < version) {
+      throw new IllegalStateException(s"Incompatible log file version ${version}")
+    }
+    // read metadata
+    lines.next().trim match {
+      case OffsetCommitLog.SERIALIZED_VOID => None
+      case metadata => Some(metadata)
+    }
+  }
+
+  override protected def serialize(metadata: Option[String], out: OutputStream): Unit = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    out.write(OffsetCommitLog.VERSION.toString.getBytes(UTF_8))
+    out.write('\n')
+
+    // write metadata or void
+    out.write(metadata.getOrElse(OffsetCommitLog.SERIALIZED_VOID).getBytes(UTF_8))
+  }
+}
+
+object OffsetCommitLog {
+  private val VERSION = 1
+  private val SERIALIZED_VOID = "{}"
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -162,6 +162,7 @@ class StreamExecution(
 
   private val triggerExecutor = trigger match {
     case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
+    case _: OneTime => OneTimeExecutor()
   }
 
   /** Defines the internal state of execution */
@@ -206,6 +207,12 @@ class StreamExecution(
    */
   val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets"))
 
+  /**
+   * A log that records the committed batch ids. This is used to check if a batch was committed
+   * on restart, instead of (possibly) re-running the previous batch.
+   */
+  val commitLog = new OffsetCommitLog(sparkSession, checkpointFile("commits"))
+
   /** Whether all fields of the query have been initialized */
   private def isInitialized: Boolean = state.get != INITIALIZING
 
@@ -373,22 +380,66 @@ class StreamExecution(
    *  - currentBatchId
    *  - committedOffsets
    *  - availableOffsets
+   *  The basic structure of this method is as follows:
+   *
+   *  Identify (from the offset log) the offsets used to run the last batch
+   *  IF a last batch exists THEN
+   *    Set the next batch to that last batch
+   *    Check the commit log to see which batch was committed last
+   *    IF the last batch was committed THEN
+   *      Call getBatch using the last batch start and end offsets
+   *      Setup for a new batch i.e., start = last batch end, and identify new end
+   *    DONE
+   *  ELSE
+   *    Identify a brand new batch
+   *  DONE
    */
   private def populateStartOffsets(): Unit = {
     offsetLog.getLatest() match {
       case Some((batchId, nextOffsets)) =>
-        logInfo(s"Resuming streaming query, starting with batch $batchId")
-        currentBatchId = batchId
         availableOffsets = nextOffsets.toStreamProgress(sources)
         offsetSeqMetadata = nextOffsets.metadata.getOrElse(OffsetSeqMetadata())
-        logDebug(s"Found possibly unprocessed offsets $availableOffsets " +
-          s"at batch timestamp ${offsetSeqMetadata.batchTimestampMs}")
-
-        offsetLog.get(batchId - 1).foreach {
-          case lastOffsets =>
+        if (batchId > 0) {
+          // We have committed at least one batch
+          offsetLog.get(batchId - 1).foreach { lastOffsets =>
             committedOffsets = lastOffsets.toStreamProgress(sources)
             logDebug(s"Resuming with committed offsets: $committedOffsets")
+          }
+        }
+        /* identify the current batch id: if commit log indicates we successfully processed the
+         * latest batch id in the offset log, then we can safely move to the next batch
+         * i.e., committedBatchId + 1
+         */
+        currentBatchId = commitLog.getLatest() match {
+          case Some((committedBatchId, _))
+            if batchId == committedBatchId => committedBatchId + 1
+          case _ => batchId
+        }
+        if (batchId < currentBatchId) {
+          /* The last batch was successfully committed, so we can safely process a
+           * new next batch but first:
+           * Make a call a call to getBatch using the offsets from previous batch.
+           * because certain sources (e.g., KafkaSource) assume on restart the last
+           * batch will be executed before getOffset is called again.
+           */
+          availableOffsets.foreach {
+            case (source, end)
+              if committedOffsets.get(source).map(_ != end).getOrElse(true) =>
+              val start = committedOffsets.get(source)
+              logDebug(s"Initializing offset retrieval from $source " +
+                s"at start $start end $end")
+              source.getBatch(start, end)
+            case _ =>
+          }
+          // Move committed offsets to the last offsets of the last batch
+          offsetLog.get(currentBatchId - 1).foreach { lastOffsets =>
+            committedOffsets = lastOffsets.toStreamProgress(sources)
+          }
+          // Construct a new batch be recomputing availableOffsets
+          constructNextBatch()
         }
+        logDebug(s"Resuming with committed offsets $committedOffsets " +
+          s"and available offsets $availableOffsets")
       case None => // We are starting this stream for the first time.
         logInfo(s"Starting new streaming query.")
         currentBatchId = 0
@@ -559,6 +610,8 @@ class StreamExecution(
     reportTimeTaken("addBatch") {
       sink.addBatch(currentBatchId, nextBatch)
     }
+    logDebug(s"Commit log write ${currentBatchId}")
+    commitLog.add(currentBatchId, None)
 
     awaitBatchLock.lock()
     try {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala
@@ -29,6 +29,17 @@ trait TriggerExecutor {
   def execute(batchRunner: () => Boolean): Unit
 }
 
+/**
+ * A trigger executor that runs a single batch only, then terminates.
+ */
+case class OneTimeExecutor() extends TriggerExecutor {
+
+  /**
+   * Execute a single batch using `batchRunner`.
+   */
+  override def execute(batchRunner: () => Boolean): Unit = batchRunner()
+}
+
 /**
  * A trigger executor that runs a batch every `intervalMs` milliseconds.
  */

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala
@@ -36,6 +36,51 @@ import org.apache.spark.unsafe.types.CalendarInterval
 @InterfaceStability.Evolving
 sealed trait Trigger
 
+/**
+ * :: Experimental ::
+ * A trigger that runs a query once then terminates
+ *
+ * Scala Example:
+ * {{{
+ *   df.write.trigger(OneTime)
+ * }}}
+ *
+ * Java Example:
+ * {{{
+ *   df.write.trigger(OneTime.create())
+ * }}}
+ *
+ * @since 2.2.0
+ */
+@Experimental
+@InterfaceStability.Evolving
+case class OneTime() extends Trigger
+
+/**
+ * :: Experimental ::
+ * Used to create [[OneTime]] triggers for [[StreamingQuery]]s.
+ *
+ * @since 2.2.0
+ */
+@Experimental
+@InterfaceStability.Evolving
+object OneTime {
+
+  /**
+   * Create a [[OneTime]] trigger.
+   *
+   * Example:
+   * {{{
+   *   df.write.trigger(OneTime.create())
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def create(): OneTime = {
+    apply()
+  }
+}
+
 /**
  * :: Experimental ::
  * A trigger that runs a query periodically based on the processing time. If `interval` is 0,

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -217,7 +217,9 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
       AddData(inputData, 25), // Evict items less than previous watermark.
       CheckLastBatch((10, 5)),
       StopStream,
-      AssertOnQuery { q => // clear the sink
+      AssertOnQuery { q => // purge commit and clear the sink
+        val commit = q.commitLog.getLatest().map(_._1).getOrElse(-1L) + 1L
+        q.commitLog.purge(commit)
         q.sink.asInstanceOf[MemorySink].clear()
         true
       },

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -152,6 +152,15 @@ class StreamSuite extends StreamTest {
       AssertOnQuery(_.offsetLog.getLatest().get._1 == expectedId,
         s"offsetLog's latest should be $expectedId")
 
+    // Check the latest batchid in the commit log
+    def CheckCommitLogLatestBatchId(expectedId: Int): AssertOnQuery =
+      AssertOnQuery(_.commitLog.getLatest().get._1 == expectedId,
+        s"commitLog's latest should be $expectedId")
+
+    // Ensure that there has not been an incremental execution after restart
+    def CheckNoIncrementalExecutionCurrentBatchId(): AssertOnQuery =
+      AssertOnQuery(_.lastExecution == null, s"lastExecution not expected to run")
+
     // For each batch, we would log the state change during the execution
     // This checks whether the key of the state change log is the expected batch id
     def CheckIncrementalExecutionCurrentBatchId(expectedId: Int): AssertOnQuery =
@@ -177,6 +186,7 @@ class StreamSuite extends StreamTest {
       // Check the results of batch 0
       CheckAnswer(1, 2, 3),
       CheckIncrementalExecutionCurrentBatchId(0),
+      CheckCommitLogLatestBatchId(0),
       CheckOffsetLogLatestBatchId(0),
       CheckSinkLatestBatchId(0),
       // Add some data in batch 1
@@ -187,6 +197,7 @@ class StreamSuite extends StreamTest {
       // Check the results of batch 1
       CheckAnswer(1, 2, 3, 4, 5, 6),
       CheckIncrementalExecutionCurrentBatchId(1),
+      CheckCommitLogLatestBatchId(1),
       CheckOffsetLogLatestBatchId(1),
       CheckSinkLatestBatchId(1),
 
@@ -199,21 +210,23 @@ class StreamSuite extends StreamTest {
       // the currentId does not get logged (e.g. as 2) even if the clock has advanced many times
       CheckAnswer(1, 2, 3, 4, 5, 6),
       CheckIncrementalExecutionCurrentBatchId(1),
+      CheckCommitLogLatestBatchId(1),
       CheckOffsetLogLatestBatchId(1),
       CheckSinkLatestBatchId(1),
 
       /* Stop then restart the Stream  */
       StopStream,
       StartStream(ProcessingTime("10 seconds"), new StreamManualClock(60 * 1000)),
 
-      /* -- batch 1 rerun ----------------- */
-      // this batch 1 would re-run because the latest batch id logged in offset log is 1
+      /* -- batch 1 no rerun ----------------- */
+      // batch 1 would not re-run because the latest batch id logged in commit log is 1
       AdvanceManualClock(10 * 1000),
+      CheckNoIncrementalExecutionCurrentBatchId(),
 
       /* -- batch 2 ----------------------- */
       // Check the results of batch 1
       CheckAnswer(1, 2, 3, 4, 5, 6),
-      CheckIncrementalExecutionCurrentBatchId(1),
+      CheckCommitLogLatestBatchId(1),
       CheckOffsetLogLatestBatchId(1),
       CheckSinkLatestBatchId(1),
       // Add some data in batch 2
@@ -224,6 +237,7 @@ class StreamSuite extends StreamTest {
       // Check the results of batch 2
       CheckAnswer(1, 2, 3, 4, 5, 6, 7, 8, 9),
       CheckIncrementalExecutionCurrentBatchId(2),
+      CheckCommitLogLatestBatchId(2),
       CheckOffsetLogLatestBatchId(2),
       CheckSinkLatestBatchId(2))
   }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -277,7 +277,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         true
       },
       StartStream(ProcessingTime("10 seconds"), triggerClock = clock),
-      CheckLastBatch((20L, 1), (85L, 1)),
+      // The commit log should ensure that we do not run another batch
+      CheckLastBatch(),
       AssertOnQuery { q =>
         clock.getTimeMillis() == 90000L
       },
@@ -327,7 +328,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         true
       },
       StartStream(ProcessingTime("10 day"), triggerClock = clock),
-      CheckLastBatch((20L, 1), (85L, 1)),
+      // Commit log should prevent batch from running again
+      CheckLastBatch(),
 
       // advance clock to 100 days, should retain keys >= 90
       AddData(inputData, 85L, 90L, 100L, 105L),

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -109,8 +109,9 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
         // Termination event generated with exception message when stopped with error
         StartStream(ProcessingTime(100), triggerClock = clock),
+        AdvanceManualClock(100), // advance clock to ensure completed initial trigger
         AddData(inputData, 0),
-        AdvanceManualClock(100),
+        AdvanceManualClock(100), // process bad data
         ExpectFailure[SparkException](),
         AssertOnQuery { query =>
           eventually(Timeout(streamingTimeout)) {