markhamstra
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala‎
Lines changed: 19 additions & 13 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala‎
Lines changed: 19 additions & 13 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala‎
Lines changed: 45 additions & 0 deletions b/‎core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala‎
Lines changed: 1 addition & 1 deletion b/‎examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala‎
Lines changed: 0 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala‎
Lines changed: 0 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala‎
Lines changed: 0 additions & 4 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala‎
Lines changed: 0 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala‎
Lines changed: 0 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala‎
Lines changed: 0 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala‎
Lines changed: 0 additions & 1 deletion
@@ -933,7 +933,6 @@ class SparkContext(config: SparkConf) extends Logging {
   def stop() {
     postApplicationEnd()
     ui.stop()
-    eventLogger.foreach(_.stop())
     // Do this only if not stopped already - best case effort.
     // prevent NPE if stopped more than once.
     val dagSchedulerCopy = dagScheduler
@@ -942,13 +941,14 @@ class SparkContext(config: SparkConf) extends Logging {
       metadataCleaner.cancel()
       cleaner.foreach(_.stop())
       dagSchedulerCopy.stop()
-      listenerBus.stop()
       taskScheduler = null
       // TODO: Cache.stop()?
       env.stop()
       SparkEnv.set(null)
       ShuffleMapTask.clearCache()
       ResultTask.clearCache()
+      listenerBus.stop()
+      eventLogger.foreach(_.stop())
       logInfo("Successfully stopped SparkContext")
     } else {
       logInfo("SparkContext already stopped")
 
@@ -36,6 +36,22 @@ private[spark] class LiveListenerBus extends SparkListenerBus with Logging {
   private val eventQueue = new LinkedBlockingQueue[SparkListenerEvent](EVENT_QUEUE_CAPACITY)
   private var queueFullErrorMessageLogged = false
   private var started = false
+  private val listenerThread = new Thread("SparkListenerBus") {
+    setDaemon(true)
+    override def run() {
+      while (true) {
+        val event = eventQueue.take
+        if (event == SparkListenerShutdown) {
+          // Get out of the while loop and shutdown the daemon thread
+          return
+        }
+        postToAll(event)
+      }
+    }
+  }
+
+  // Exposed for testing
+  @volatile private[spark] var stopCalled = false
 
   /**
    * Start sending events to attached listeners.
@@ -48,20 +64,8 @@ private[spark] class LiveListenerBus extends SparkListenerBus with Logging {
     if (started) {
       throw new IllegalStateException("Listener bus already started!")
     }
+    listenerThread.start()
     started = true
-    new Thread("SparkListenerBus") {
-      setDaemon(true)
-      override def run() {
-        while (true) {
-          val event = eventQueue.take
-          if (event == SparkListenerShutdown) {
-            // Get out of the while loop and shutdown the daemon thread
-            return
-          }
-          postToAll(event)
-        }
-      }
-    }.start()
   }
 
   def post(event: SparkListenerEvent, blocking: Boolean = false) {
@@ -98,9 +102,11 @@ private[spark] class LiveListenerBus extends SparkListenerBus with Logging {
   }
 
   def stop() {
+    stopCalled = true
     if (!started) {
       throw new IllegalStateException("Attempted to stop a listener bus that has not yet started!")
     }
     post(SparkListenerShutdown)
+    listenerThread.join()
   }
 }
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler
 
+import java.util.concurrent.Semaphore
+
 import scala.collection.mutable
 
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
@@ -72,6 +74,49 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
     }
   }
 
+  test("bus.stop() waits for the event queue to completely drain") {
+    @volatile var drained = false
+
+    // Tells the listener to stop blocking
+    val listenerWait = new Semaphore(1)
+
+    // When stop has returned
+    val stopReturned = new Semaphore(1)
+
+    class BlockingListener extends SparkListener {
+      override def onJobEnd(jobEnd: SparkListenerJobEnd) = {
+        listenerWait.acquire()
+        drained = true
+      }
+    }
+
+    val bus = new LiveListenerBus
+    val blockingListener = new BlockingListener
+
+    bus.addListener(blockingListener)
+    bus.start()
+    bus.post(SparkListenerJobEnd(0, JobSucceeded))
+
+    // the queue should not drain immediately
+    assert(!drained)
+
+    new Thread("ListenerBusStopper") {
+      override def run() {
+        // stop() will block until notify() is called below
+        bus.stop()
+        stopReturned.release(1)
+      }
+    }.start()
+
+    while (!bus.stopCalled) {
+      Thread.sleep(10)
+    }
+
+    listenerWait.release()
+    stopReturned.acquire()
+    assert(drained)
+  }
+
   test("basic creation of StageInfo") {
     val listener = new SaveStageAndTaskInfo
     sc.addSparkListener(listener)
 
@@ -73,6 +73,6 @@ object SparkHdfsLR {
     }
 
     println("Final w: " + w)
-    System.exit(0)
+    sc.stop()
   }
 }
@@ -30,7 +30,6 @@ import org.apache.spark.rdd.RDD
 
 /**
  * :: DeveloperApi ::
- *
  * The Java stubs necessary for the Python mllib bindings.
  */
 @DeveloperApi
 
@@ -29,7 +29,6 @@ import org.apache.spark.rdd.RDD
 
 /**
  * :: Experimental ::
- *
  * Model for Naive Bayes Classifiers.
  *
  * @param labels list of labels
 
@@ -78,7 +78,6 @@ class KMeans private (
 
   /**
    * :: Experimental ::
-   *
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
@@ -398,9 +397,6 @@ object KMeans {
     MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
   }
 
-  /**
-   * :: Experimental ::
-   */
   @Experimental
   def main(args: Array[String]) {
     if (args.length < 4) {
 
@@ -34,7 +34,6 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
 
 /**
  * :: Experimental ::
- *
  * Represents a matrix in coordinate format.
  *
  * @param entries matrix entries
 
@@ -26,15 +26,13 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition
 
 /**
  * :: Experimental ::
- *
  * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]].
  */
 @Experimental
 case class IndexedRow(index: Long, vector: Vector)
 
 /**
  * :: Experimental ::
- *
  * Represents a row-oriented [[org.apache.spark.mllib.linalg.distributed.DistributedMatrix]] with
  * indexed rows.
  *
 
@@ -30,7 +30,6 @@ import org.apache.spark.Logging
 
 /**
  * :: Experimental ::
- *
  * Represents a row-oriented distributed Matrix with no meaningful row indices.
  *
  * @param rows rows stored as an RDD[Vector]
Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,6 @@ object SparkHdfsLR {`
`73`	`73`	`}`
`74`	`74`
`75`	`75`	`println("Final w: " + w)`
`76`		`- System.exit(0)`
	`76`	`+ sc.stop()`
`77`	`77`	`}`
`78`	`78`	`}`
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,6 @@ import org.apache.spark.rdd.RDD`
`29`	`29`
`30`	`30`	`/**`
`31`	`31`	`* :: Experimental ::`
`32`		`- *`
`33`	`32`	`* Model for Naive Bayes Classifiers.`
`34`	`33`	`*`
`35`	`34`	`* @param labels list of labels`
Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,6 @@ case class MatrixEntry(i: Long, j: Long, value: Double)`
`34`	`34`
`35`	`35`	`/**`
`36`	`36`	`* :: Experimental ::`
`37`		`- *`
`38`	`37`	`* Represents a matrix in coordinate format.`
`39`	`38`	`*`
`40`	`39`	`* @param entries matrix entries`
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,6 @@ import org.apache.spark.Logging`
`30`	`30`
`31`	`31`	`/**`
`32`	`32`	`* :: Experimental ::`
`33`		`- *`
`34`	`33`	`* Represents a row-oriented distributed Matrix with no meaningful row indices.`
`35`	`34`	`*`
`36`	`35`	`* @param rows rows stored as an RDD[Vector]`