-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-31593][SS] Remove unnecessary streaming query progress update #28391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -389,22 +389,22 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter { | |
| // Structured Streaming in Spark 2.0.0. Because we renamed the classes, | ||
| // SparkListenerApplicationEnd is the only valid event and it's the last event. We use it | ||
| // to verify that we can skip broken jsons generated by Structured Streaming. | ||
| testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.0.txt", 1) | ||
| testReplayListenerBusWithBrokenEventJsons("query-event-logs-version-2.0.0.txt", 1) | ||
| } | ||
|
|
||
| testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2_0_1") { | ||
| // query-event-logs-version-2.0.1.txt has all types of events generated by | ||
| // Structured Streaming in Spark 2.0.1. Because we renamed the classes, | ||
| // SparkListenerApplicationEnd is the only valid event and it's the last event. We use it | ||
| // to verify that we can skip broken jsons generated by Structured Streaming. | ||
| testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.1.txt", 1) | ||
| testReplayListenerBusWithBrokenEventJsons("query-event-logs-version-2.0.1.txt", 1) | ||
| } | ||
|
|
||
| testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2_0_2") { | ||
| // query-event-logs-version-2.0.2.txt has all types of events generated by | ||
| // Structured Streaming in Spark 2.0.2. SPARK-18516 refactored Structured Streaming query events | ||
| // in 2.1.0. This test is to verify we are able to load events generated by Spark 2.0.2. | ||
| testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.2.txt", 5) | ||
| testReplayListenerBusWithBrokenEventJsons("query-event-logs-version-2.0.2.txt", 5) | ||
| } | ||
|
|
||
| test("listener propagates observable metrics") { | ||
|
|
@@ -433,9 +433,13 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter { | |
| } | ||
|
|
||
| try { | ||
| val noDataProgressIntervalKey = SQLConf.STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL.key | ||
| spark.streams.addListener(listener) | ||
| testStream(df, OutputMode.Append)( | ||
| StartStream(Trigger.ProcessingTime(100), triggerClock = clock), | ||
| StartStream( | ||
| Trigger.ProcessingTime(100), | ||
| triggerClock = clock, | ||
| Map(noDataProgressIntervalKey -> "100")), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set "noDataProgressIntervalKey" to 100. After advance manual clock at batch 3 (with no data), it will report an empty progress event. |
||
| // Batch 1 | ||
| AddData(inputData, 1, 2), | ||
| AdvanceManualClock(100), | ||
|
|
@@ -464,7 +468,49 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter { | |
| } | ||
| } | ||
|
|
||
| private def testReplayListenerBusWithBorkenEventJsons( | ||
| test("SPARK-31593: remove unnecessary streaming query progress update") { | ||
| withSQLConf(SQLConf.STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL.key -> "100") { | ||
| @volatile var numProgressEvent = 0 | ||
| val listener = new StreamingQueryListener { | ||
| override def onQueryStarted(event: QueryStartedEvent): Unit = {} | ||
| override def onQueryProgress(event: QueryProgressEvent): Unit = { | ||
| numProgressEvent += 1 | ||
| } | ||
| override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {} | ||
| } | ||
| spark.streams.addListener(listener) | ||
|
|
||
| def checkProgressEvent(count: Int): StreamAction = { | ||
| AssertOnQuery { _ => | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add an inner function to simplify test. |
||
| eventually(Timeout(streamingTimeout)) { | ||
| assert(numProgressEvent == count) | ||
| } | ||
| true | ||
| } | ||
| } | ||
|
|
||
| try { | ||
| val input = new MemoryStream[Int](0, sqlContext) | ||
| val clock = new StreamManualClock() | ||
| val result = input.toDF().select("value") | ||
| testStream(result)( | ||
| StartStream(trigger = Trigger.ProcessingTime(10), triggerClock = clock), | ||
| AddData(input, 10), | ||
| checkProgressEvent(1), | ||
| AdvanceManualClock(10), | ||
| checkProgressEvent(2), | ||
| AdvanceManualClock(90), | ||
| checkProgressEvent(2), | ||
| AdvanceManualClock(10), | ||
| checkProgressEvent(3) | ||
| ) | ||
| } finally { | ||
| spark.streams.removeListener(listener) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private def testReplayListenerBusWithBrokenEventJsons( | ||
| fileName: String, | ||
| expectedEventSize: Int): Unit = { | ||
| val input = getClass.getResourceAsStream(s"/structured-streaming/$fileName") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When reset
"lastNoExecutionProgressEventTime =Long.MinValue", it will make"now - noDataProgressEventInterval >= lastNoExecutionProgressEventTime"alwaystruewhen there is no new data. Then progress reporter will report anemptyprogress.