1717
1818package org .apache .spark
1919
20+ import java .lang .ref .{ReferenceQueue , WeakReference }
21+
2022import scala .collection .mutable .{ArrayBuffer , SynchronizedBuffer }
2123
22- import java .util .concurrent .{LinkedBlockingQueue , TimeUnit }
23- import org .apache .spark .storage .StorageLevel
24+ import org .apache .spark .rdd .RDD
2425
2526/** Listener class used for testing when any item has been cleaned by the Cleaner class */
2627private [spark] trait CleanerListener {
@@ -34,20 +35,27 @@ private[spark] trait CleanerListener {
3435private [spark] class ContextCleaner (sc : SparkContext ) extends Logging {
3536
3637 /** Classes to represent cleaning tasks */
37- private sealed trait CleaningTask
38- private case class CleanRDD (rddId : Int ) extends CleaningTask
39- private case class CleanShuffle (shuffleId : Int ) extends CleaningTask
38+ private sealed trait CleanupTask
39+ private case class CleanRDD (rddId : Int ) extends CleanupTask
40+ private case class CleanShuffle (shuffleId : Int ) extends CleanupTask
4041 // TODO: add CleanBroadcast
4142
42- private val queue = new LinkedBlockingQueue [CleaningTask ]
43+ private val referenceBuffer = new ArrayBuffer [WeakReferenceWithCleanupTask ]
44+ with SynchronizedBuffer [WeakReferenceWithCleanupTask ]
45+ private val referenceQueue = new ReferenceQueue [AnyRef ]
4346
44- protected val listeners = new ArrayBuffer [CleanerListener ]
47+ private val listeners = new ArrayBuffer [CleanerListener ]
4548 with SynchronizedBuffer [CleanerListener ]
4649
4750 private val cleaningThread = new Thread () { override def run () { keepCleaning() }}
4851
52+ private val REF_QUEUE_POLL_TIMEOUT = 100
53+
4954 @ volatile private var stopped = false
5055
56+ private class WeakReferenceWithCleanupTask (referent : AnyRef , val task : CleanupTask )
57+ extends WeakReference (referent, referenceQueue)
58+
5159 /** Start the cleaner */
5260 def start () {
5361 cleaningThread.setDaemon(true )
@@ -62,21 +70,27 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
6270 }
6371
6472 /**
65- * Schedule cleanup of RDD data. Do not perform any time or resource intensive
66- * computation in this function as this is called from a finalize() function.
73+ * Register a RDD for cleanup when it is garbage collected.
6774 */
68- def scheduleRDDCleanup (rddId : Int ) {
69- enqueue(CleanRDD (rddId))
70- logDebug(" Enqueued RDD " + rddId + " for cleaning up" )
75+ def registerRDDForCleanup (rdd : RDD [_]) {
76+ registerForCleanup(rdd, CleanRDD (rdd.id))
7177 }
7278
7379 /**
74- * Schedule cleanup of shuffle data. Do not perform any time or resource intensive
75- * computation in this function as this is called from a finalize() function.
80+ * Register a shuffle dependency for cleanup when it is garbage collected.
7681 */
77- def scheduleShuffleCleanup (shuffleId : Int ) {
78- enqueue(CleanShuffle (shuffleId))
79- logDebug(" Enqueued shuffle " + shuffleId + " for cleaning up" )
82+ def registerShuffleForCleanup (shuffleDependency : ShuffleDependency [_, _]) {
83+ registerForCleanup(shuffleDependency, CleanShuffle (shuffleDependency.shuffleId))
84+ }
85+
86+ /** Cleanup RDD. */
87+ def cleanupRDD (rdd : RDD [_]) {
88+ doCleanupRDD(rdd.id)
89+ }
90+
91+ /** Cleanup shuffle. */
92+ def cleanupShuffle (shuffleDependency : ShuffleDependency [_, _]) {
93+ doCleanupShuffle(shuffleDependency.shuffleId)
8094 }
8195
8296 /** Attach a listener object to get information of when objects are cleaned. */
@@ -91,24 +105,23 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
91105 sc.persistentRdds.remove(rddId)
92106 }
93107
94- /**
95- * Enqueue a cleaning task. Do not perform any time or resource intensive
96- * computation in this function as this is called from a finalize() function.
97- */
98- private def enqueue (task : CleaningTask ) {
99- queue.put(task)
108+ /** Register an object for cleanup. */
109+ private def registerForCleanup (objectForCleanup : AnyRef , task : CleanupTask ) {
110+ referenceBuffer += new WeakReferenceWithCleanupTask (objectForCleanup, task)
100111 }
101112
102113 /** Keep cleaning RDDs and shuffle data */
103114 private def keepCleaning () {
104115 while (! isStopped) {
105116 try {
106- val taskOpt = Option (queue.poll(100 , TimeUnit .MILLISECONDS ))
107- taskOpt.foreach { task =>
108- logDebug(" Got cleaning task " + taskOpt.get)
117+ val reference = Option (referenceQueue.remove(REF_QUEUE_POLL_TIMEOUT ))
118+ .map(_.asInstanceOf [WeakReferenceWithCleanupTask ])
119+ reference.map(_.task).foreach { task =>
120+ logDebug(" Got cleaning task " + task)
121+ referenceBuffer -= reference.get
109122 task match {
110- case CleanRDD (rddId) => doCleanRDD (rddId)
111- case CleanShuffle (shuffleId) => doCleanShuffle (shuffleId)
123+ case CleanRDD (rddId) => doCleanupRDD (rddId)
124+ case CleanShuffle (shuffleId) => doCleanupShuffle (shuffleId)
112125 }
113126 }
114127 } catch {
@@ -119,8 +132,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
119132 }
120133 }
121134
122- /** Perform RDD cleaning */
123- private def doCleanRDD (rddId : Int ) {
135+ /** Perform RDD cleanup. */
136+ private def doCleanupRDD (rddId : Int ) {
124137 try {
125138 logDebug(" Cleaning RDD " + rddId)
126139 unpersistRDD(rddId, false )
@@ -131,8 +144,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
131144 }
132145 }
133146
134- /** Perform shuffle cleaning */
135- private def doCleanShuffle (shuffleId : Int ) {
147+ /** Perform shuffle cleanup. */
148+ private def doCleanupShuffle (shuffleId : Int ) {
136149 try {
137150 logDebug(" Cleaning shuffle " + shuffleId)
138151 mapOutputTrackerMaster.unregisterShuffle(shuffleId)
@@ -144,7 +157,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
144157 }
145158 }
146159
147- private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf [MapOutputTrackerMaster ]
160+ private def mapOutputTrackerMaster =
161+ sc.env.mapOutputTracker.asInstanceOf [MapOutputTrackerMaster ]
148162
149163 private def blockManagerMaster = sc.env.blockManager.master
150164
0 commit comments