-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-24309][CORE] AsyncEventQueue should stop on interrupt. #21356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a689f52
0a44c06
4a1f657
09d55af
fc8a197
008d14d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,7 +34,8 @@ import org.apache.spark.util.Utils | |
| * Delivery will only begin when the `start()` method is called. The `stop()` method should be | ||
| * called when no more events need to be delivered. | ||
| */ | ||
| private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveListenerBusMetrics) | ||
| private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveListenerBusMetrics, | ||
| bus: LiveListenerBus) | ||
| extends SparkListenerBus | ||
| with Logging { | ||
|
|
||
|
|
@@ -97,6 +98,11 @@ private class AsyncEventQueue(val name: String, conf: SparkConf, metrics: LiveLi | |
| } catch { | ||
| case ie: InterruptedException => | ||
| logInfo(s"Stopping listener queue $name.", ie) | ||
| stopped.set(true) | ||
|
||
| bus.removeQueue(name) | ||
| // we're not going to process any more events in this queue, so might as well clear it | ||
| eventQueue.clear() | ||
| eventCount.set(0) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,7 +102,7 @@ private[spark] class LiveListenerBus(conf: SparkConf) { | |
| queue.addListener(listener) | ||
|
|
||
| case None => | ||
| val newQueue = new AsyncEventQueue(queue, conf, metrics) | ||
| val newQueue = new AsyncEventQueue(queue, conf, metrics, this) | ||
| newQueue.addListener(listener) | ||
| if (started.get()) { | ||
| newQueue.start(sparkContext) | ||
|
|
@@ -111,6 +111,12 @@ private[spark] class LiveListenerBus(conf: SparkConf) { | |
| } | ||
| } | ||
|
|
||
| private[scheduler] def removeQueue(queue: String): Unit = synchronized { | ||
|
||
| queues.asScala.find(_.name == queue).foreach { q => | ||
| queues.remove(q) | ||
| } | ||
| } | ||
|
|
||
| def removeListener(listener: SparkListenerInterface): Unit = synchronized { | ||
| // Remove listener from all queues it was added to, and stop queues that have become empty. | ||
| queues.asScala | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -489,6 +489,38 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match | |
| assert(bus.findListenersByClass[BasicJobCounter]().isEmpty) | ||
| } | ||
|
|
||
| test("interrupt within listener is handled correctly") { | ||
| val conf = new SparkConf(false) | ||
| .set(LISTENER_BUS_EVENT_QUEUE_CAPACITY, 5) | ||
| val bus = new LiveListenerBus(conf) | ||
| val counter1 = new BasicJobCounter() | ||
| val counter2 = new BasicJobCounter() | ||
| val interruptingListener = new InterruptingListener | ||
| bus.addToSharedQueue(counter1) | ||
| bus.addToSharedQueue(interruptingListener) | ||
| bus.addToStatusQueue(counter2) | ||
| assert(bus.activeQueues() === Set(SHARED_QUEUE, APP_STATUS_QUEUE)) | ||
| assert(bus.findListenersByClass[BasicJobCounter]().size === 2) | ||
|
|
||
| bus.start(mockSparkContext, mockMetricsSystem) | ||
|
|
||
| // after we post one event, the shared queue should get stopped because of the interrupt | ||
|
||
| bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) | ||
| bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) | ||
| assert(bus.activeQueues() === Set(APP_STATUS_QUEUE)) | ||
| assert(bus.findListenersByClass[BasicJobCounter]().size === 1) | ||
| assert(counter2.count === 1) | ||
|
|
||
| // posting more events should be fine, they'll just get processed from the OK queue. | ||
| (0 until 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) } | ||
| bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) | ||
| assert(counter2.count === 6) | ||
|
|
||
| // Make sure stopping works -- this requires putting a poison pill in all active queues, which | ||
| // would fail if our interrupted queue was still active, as its queue would be full. | ||
| bus.stop() | ||
| } | ||
|
|
||
| /** | ||
| * Assert that the given list of numbers has an average that is greater than zero. | ||
| */ | ||
|
|
@@ -547,6 +579,14 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match | |
| override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { throw new Exception } | ||
| } | ||
|
|
||
| /** | ||
| * A simple listener that interrupts on job end. | ||
| */ | ||
| private class InterruptingListener extends SparkListener { | ||
| override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { | ||
| Thread.currentThread().interrupt() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // These classes can't be declared inside of the SparkListenerSuite class because we don't want | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one param per line now that they don't fit