-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19876][SS][WIP] OneTime Trigger Executor #17219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
08a36f8
0e21d0e
9b8abb4
682eb1a
a129dd5
3e666b1
b4ef029
7cb43b7
98812cb
8c5b84f
573ec98
5d2ba62
2989fad
24746f3
fd28ed7
841f5c9
83db3b8
b02d10d
5e16632
d7c5edf
ae92ec6
64cd233
8b50da3
f928ade
0925965
db5ae3f
0c3e20c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution.streaming | ||
|
|
||
| import java.io.{InputStream, OutputStream} | ||
| import java.nio.charset.StandardCharsets._ | ||
|
|
||
| import scala.io.{Source => IOSource} | ||
|
|
||
| import org.apache.spark.sql.SparkSession | ||
|
|
||
| /** | ||
| * Used to write log files that represent commit points in structured streaming. | ||
| * A log file will be written immediately after the successful completion of a | ||
| * batch, and before processing the next batch. Here is an execution summary: | ||
| * - trigger batch 1 | ||
| * - obtain batch 1 offsets and write to offset log | ||
| * - process batch 1 | ||
| * - write batch 1 to commit log | ||
| * - trigger batch 2 | ||
| * - obtain bactch 2 offsets and write to offset log | ||
| * - process batch 2 | ||
| * - write batch 2 to commit log | ||
| * .... | ||
| * | ||
| * The current format of the commit log is: | ||
| * line 1: version | ||
| * line 2: metadata (optional json string) | ||
| */ | ||
| class OffsetCommitLog(sparkSession: SparkSession, path: String) | ||
| extends HDFSMetadataLog[Option[String]](sparkSession, path) { | ||
|
|
||
| override protected def deserialize(in: InputStream): Option[String] = { | ||
| // called inside a try-finally where the underlying stream is closed in the caller | ||
| val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() | ||
| if (!lines.hasNext) { | ||
| throw new IllegalStateException("Incomplete log file") | ||
|
||
| } | ||
| val version = lines.next().trim.toInt | ||
| if (OffsetCommitLog.VERSION < version) { | ||
| throw new IllegalStateException(s"Incompatible log file version ${version}") | ||
| } | ||
| // read metadata | ||
| lines.next().trim match { | ||
| case OffsetCommitLog.SERIALIZED_VOID => None | ||
| case metadata => Some(metadata) | ||
| } | ||
| } | ||
|
|
||
| override protected def serialize(metadata: Option[String], out: OutputStream): Unit = { | ||
| // called inside a try-finally where the underlying stream is closed in the caller | ||
| out.write(OffsetCommitLog.VERSION.toString.getBytes(UTF_8)) | ||
| out.write('\n') | ||
|
|
||
| // write metadata or void | ||
| out.write(metadata.getOrElse(OffsetCommitLog.SERIALIZED_VOID).getBytes(UTF_8)) | ||
| } | ||
| } | ||
|
|
||
| object OffsetCommitLog { | ||
| private val VERSION = 1 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets be consistent with other logs in writing "v1" for version and not "1" |
||
| private val SERIALIZED_VOID = "{}" | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -162,6 +162,7 @@ class StreamExecution( | |
|
|
||
| private val triggerExecutor = trigger match { | ||
| case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock) | ||
| case _: OneTime => OneTimeExecutor() | ||
| } | ||
|
|
||
| /** Defines the internal state of execution */ | ||
|
|
@@ -206,6 +207,12 @@ class StreamExecution( | |
| */ | ||
| val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets")) | ||
|
|
||
| /** | ||
| * A log that records the committed batch ids. This is used to check if a batch was committed | ||
| * on restart, instead of (possibly) re-running the previous batch. | ||
|
||
| */ | ||
| val commitLog = new OffsetCommitLog(sparkSession, checkpointFile("commits")) | ||
|
|
||
| /** Whether all fields of the query have been initialized */ | ||
| private def isInitialized: Boolean = state.get != INITIALIZING | ||
|
|
||
|
|
@@ -373,22 +380,66 @@ class StreamExecution( | |
| * - currentBatchId | ||
| * - committedOffsets | ||
| * - availableOffsets | ||
| * The basic structure of this method is as follows: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. really like this explanation. |
||
| * | ||
| * Identify (from the offset log) the offsets used to run the last batch | ||
| * IF a last batch exists THEN | ||
|
||
| * Set the next batch to that last batch | ||
|
||
| * Check the commit log to see which batch was committed last | ||
| * IF the last batch was committed THEN | ||
| * Call getBatch using the last batch start and end offsets | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add the reason regarding why we do this. |
||
| * Setup for a new batch i.e., start = last batch end, and identify new end | ||
| * DONE | ||
| * ELSE | ||
| * Identify a brand new batch | ||
| * DONE | ||
| */ | ||
| private def populateStartOffsets(): Unit = { | ||
| offsetLog.getLatest() match { | ||
| case Some((batchId, nextOffsets)) => | ||
|
||
| logInfo(s"Resuming streaming query, starting with batch $batchId") | ||
| currentBatchId = batchId | ||
| availableOffsets = nextOffsets.toStreamProgress(sources) | ||
| offsetSeqMetadata = nextOffsets.metadata.getOrElse(OffsetSeqMetadata()) | ||
| logDebug(s"Found possibly unprocessed offsets $availableOffsets " + | ||
| s"at batch timestamp ${offsetSeqMetadata.batchTimestampMs}") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove this debug log?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. this does not make sense here any more. But please add similar logging of recovered metadata later, where you have logged the start and available offsets. |
||
|
|
||
| offsetLog.get(batchId - 1).foreach { | ||
| case lastOffsets => | ||
| if (batchId > 0) { | ||
|
||
| // We have committed at least one batch | ||
| offsetLog.get(batchId - 1).foreach { lastOffsets => | ||
| committedOffsets = lastOffsets.toStreamProgress(sources) | ||
| logDebug(s"Resuming with committed offsets: $committedOffsets") | ||
| } | ||
| } | ||
| /* identify the current batch id: if commit log indicates we successfully processed the | ||
| * latest batch id in the offset log, then we can safely move to the next batch | ||
| * i.e., committedBatchId + 1 | ||
| */ | ||
| currentBatchId = commitLog.getLatest() match { | ||
|
||
| case Some((committedBatchId, _)) | ||
| if batchId == committedBatchId => committedBatchId + 1 | ||
| case _ => batchId | ||
| } | ||
| if (batchId < currentBatchId) { | ||
|
||
| /* The last batch was successfully committed, so we can safely process a | ||
| * new next batch but first: | ||
| * Make a call a call to getBatch using the offsets from previous batch. | ||
|
||
| * because certain sources (e.g., KafkaSource) assume on restart the last | ||
| * batch will be executed before getOffset is called again. | ||
| */ | ||
| availableOffsets.foreach { | ||
| case (source, end) | ||
| if committedOffsets.get(source).map(_ != end).getOrElse(true) => | ||
| val start = committedOffsets.get(source) | ||
| logDebug(s"Initializing offset retrieval from $source " + | ||
| s"at start $start end $end") | ||
| source.getBatch(start, end) | ||
| case _ => | ||
| } | ||
| // Move committed offsets to the last offsets of the last batch | ||
| offsetLog.get(currentBatchId - 1).foreach { lastOffsets => | ||
| committedOffsets = lastOffsets.toStreamProgress(sources) | ||
| } | ||
| // Construct a new batch be recomputing availableOffsets | ||
| constructNextBatch() | ||
| } | ||
| logDebug(s"Resuming with committed offsets $committedOffsets " + | ||
|
||
| s"and available offsets $availableOffsets") | ||
| case None => // We are starting this stream for the first time. | ||
| logInfo(s"Starting new streaming query.") | ||
| currentBatchId = 0 | ||
|
|
@@ -559,6 +610,8 @@ class StreamExecution( | |
| reportTimeTaken("addBatch") { | ||
| sink.addBatch(currentBatchId, nextBatch) | ||
| } | ||
| logDebug(s"Commit log write ${currentBatchId}") | ||
| commitLog.add(currentBatchId, None) | ||
|
|
||
| awaitBatchLock.lock() | ||
| try { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,6 +36,51 @@ import org.apache.spark.unsafe.types.CalendarInterval | |
| @InterfaceStability.Evolving | ||
| sealed trait Trigger | ||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * A trigger that runs a query once then terminates | ||
| * | ||
| * Scala Example: | ||
| * {{{ | ||
| * df.write.trigger(OneTime) | ||
| * }}} | ||
| * | ||
| * Java Example: | ||
| * {{{ | ||
| * df.write.trigger(OneTime.create()) | ||
|
||
| * }}} | ||
| * | ||
| * @since 2.2.0 | ||
| */ | ||
| @Experimental | ||
| @InterfaceStability.Evolving | ||
| case class OneTime() extends Trigger | ||
|
||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * Used to create [[OneTime]] triggers for [[StreamingQuery]]s. | ||
|
||
| * | ||
| * @since 2.2.0 | ||
| */ | ||
| @Experimental | ||
| @InterfaceStability.Evolving | ||
| object OneTime { | ||
|
|
||
| /** | ||
| * Create a [[OneTime]] trigger. | ||
| * | ||
| * Example: | ||
| * {{{ | ||
| * df.write.trigger(OneTime.create()) | ||
|
||
| * }}} | ||
| * | ||
| * @since 2.0.0 | ||
|
||
| */ | ||
| def create(): OneTime = { | ||
| apply() | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * A trigger that runs a query periodically based on the processing time. If `interval` is 0, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -277,7 +277,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte | |
| true | ||
| }, | ||
| StartStream(ProcessingTime("10 seconds"), triggerClock = clock), | ||
| CheckLastBatch((20L, 1), (85L, 1)), | ||
| // The commit log should ensure that we do not run another batch | ||
| CheckLastBatch(), | ||
|
||
| AssertOnQuery { q => | ||
| clock.getTimeMillis() == 90000L | ||
| }, | ||
|
|
@@ -327,7 +328,8 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte | |
| true | ||
| }, | ||
| StartStream(ProcessingTime("10 day"), triggerClock = clock), | ||
| CheckLastBatch((20L, 1), (85L, 1)), | ||
| // Commit log should prevent batch from running again | ||
|
||
| CheckLastBatch(), | ||
|
|
||
| // advance clock to 100 days, should retain keys >= 90 | ||
| AddData(inputData, 85L, 90L, 100L, 105L), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,8 +109,9 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter { | |
|
|
||
| // Termination event generated with exception message when stopped with error | ||
| StartStream(ProcessingTime(100), triggerClock = clock), | ||
| AdvanceManualClock(100), // advance clock to ensure completed initial trigger | ||
|
||
| AddData(inputData, 0), | ||
| AdvanceManualClock(100), | ||
| AdvanceManualClock(100), // process bad data | ||
| ExpectFailure[SparkException](), | ||
| AssertOnQuery { query => | ||
| eventually(Timeout(streamingTimeout)) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Scala doc please