Fixed docs and styles.

tdas · tdas · commit cb0a5a66ce7d · 2014-03-11T11:33:43.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -30,7 +30,7 @@ private[spark] trait CleanerListener {
 }
 
 /**
- * Cleans RDDs and shuffle data. This should be instantiated only on the driver.
+ * Cleans RDDs and shuffle data.
  */
 private[spark] class ContextCleaner(env: SparkEnv) extends Logging {
 
@@ -62,12 +62,13 @@ private[spark] class ContextCleaner(env: SparkEnv) extends Logging {
     cleaningThread.interrupt()
   }
 
-  /** Clean all data and metadata related to a RDD, including shuffle files and metadata */
+  /** Clean (unpersist) RDD data. */
   def cleanRDD(rdd: RDD[_]) {
     enqueue(CleanRDD(rdd.sparkContext, rdd.id))
     logDebug("Enqueued RDD " + rdd + " for cleaning up")
   }
 
+  /** Clean shuffle data. */
   def cleanShuffle(shuffleId: Int) {
     enqueue(CleanShuffle(shuffleId))
     logDebug("Enqueued shuffle " + shuffleId + " for cleaning up")
@@ -102,16 +103,16 @@ private[spark] class ContextCleaner(env: SparkEnv) extends Logging {
 
   /** Perform RDD cleaning */
   private def doCleanRDD(sc: SparkContext, rddId: Int) {
-    logDebug("Cleaning rdd "+ rddId)
+    logDebug("Cleaning rdd " + rddId)
     sc.env.blockManager.master.removeRdd(rddId, false)
     sc.persistentRdds.remove(rddId)
     listeners.foreach(_.rddCleaned(rddId))
-    logInfo("Cleaned rdd "+ rddId)
+    logInfo("Cleaned rdd " + rddId)
   }
 
   /** Perform shuffle cleaning */
   private def doCleanShuffle(shuffleId: Int) {
-    logDebug("Cleaning shuffle "+ shuffleId)
+    logDebug("Cleaning shuffle " + shuffleId)
     mapOutputTrackerMaster.unregisterShuffle(shuffleId)
     blockManager.master.removeShuffle(shuffleId)
     listeners.foreach(_.shuffleCleaned(shuffleId))
@@ -123,4 +124,4 @@ private[spark] class ContextCleaner(env: SparkEnv) extends Logging {
   private def blockManager = env.blockManager
 
   private def isStopped = synchronized { stopped }
-}
+}
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -54,6 +54,11 @@ private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster
   }
 }
 
+/**
+ * Class that keeps track of the location of the location of the mapt output of
+ * a stage. This is abstract because different versions of MapOutputTracker
+ * (driver and worker) use different HashMap to store its metadata.
+ */
 private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging {
 
   private val timeout = AkkaUtils.askTimeout(conf)
@@ -181,6 +186,10 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   }
 }
 
+/**
+ * MapOutputTracker for the workers. This uses BoundedHashMap to keep track of
+ * a limited number of most recently used map output information.
+ */
 private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTracker(conf) {
 
   /**
@@ -192,7 +201,10 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   protected val mapStatuses = new BoundedHashMap[Int, Array[MapStatus]](MAX_MAP_STATUSES, true)
 }
 
-
+/**
+ * MapOutputTracker for the driver. This uses TimeStampedHashMap to keep track of map
+ * output information, which allows old output information based on a TTL.
+ */
 private[spark] class MapOutputTrackerMaster(conf: SparkConf)
   extends MapOutputTracker(conf) {
 
diff --git a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
@@ -50,7 +50,8 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
   override def get(key: A): Option[B] = {
     val timeStampedValue = internalMap.get(key)
     if (updateTimeStampOnGet && timeStampedValue != null) {
-      internalJavaMap.replace(key, timeStampedValue, TimeStampedValue(currentTime, timeStampedValue.value))
+      internalJavaMap.replace(key, timeStampedValue,
+        TimeStampedValue(currentTime, timeStampedValue.value))
     }
     Option(timeStampedValue).map(_.value)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/TimeStampedWeakValueHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedWeakValueHashMap.scala
@@ -33,7 +33,7 @@ private[util] case class TimeStampedWeakValue[T](timestamp: Long, weakValue: Wea
  * A map that stores the timestamp of when a key was inserted along with the value,
  * while ensuring that the values are weakly referenced. If the value is garbage collected and
  * the weak reference is null, get() operation returns the key be non-existent. However,
- * the key is actually not remmoved in the current implementation. Key-value pairs whose
+ * the key is actually not removed in the current implementation. Key-value pairs whose
  * timestamps are older than a particular threshold time can then be removed using the
  * clearOldValues method. It exposes a scala.collection.mutable.Map interface to allow it to be a
  * drop-in replacement for Scala HashMaps.

Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,11 @@ private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster`
`54`	`54`	`}`
`55`	`55`	`}`
`56`	`56`
	`57`	`+/**`
	`58`	`+ * Class that keeps track of the location of the location of the mapt output of`
	`59`	`+ * a stage. This is abstract because different versions of MapOutputTracker`
	`60`	`+ * (driver and worker) use different HashMap to store its metadata.`
	`61`	`+ */`
`57`	`62`	`private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging {`
`58`	`63`
`59`	`64`	`private val timeout = AkkaUtils.askTimeout(conf)`
`@@ -181,6 +186,10 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging`
`181`	`186`	`}`
`182`	`187`	`}`
`183`	`188`
	`189`	`+/**`
	`190`	`+ * MapOutputTracker for the workers. This uses BoundedHashMap to keep track of`
	`191`	`+ * a limited number of most recently used map output information.`
	`192`	`+ */`
`184`	`193`	`private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTracker(conf) {`
`185`	`194`
`186`	`195`	`/**`
`@@ -192,7 +201,10 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr`
`192`	`201`	`protected val mapStatuses = new BoundedHashMap[Int, Array[MapStatus]](MAX_MAP_STATUSES, true)`
`193`	`202`	`}`
`194`	`203`
`195`		`-`
	`204`	`+/**`
	`205`	`+ * MapOutputTracker for the driver. This uses TimeStampedHashMap to keep track of map`
	`206`	`+ * output information, which allows old output information based on a TTL.`
	`207`	`+ */`
`196`	`208`	`private[spark] class MapOutputTrackerMaster(conf: SparkConf)`
`197`	`209`	`extends MapOutputTracker(conf) {`
`198`	`210`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,8 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa`
`50`	`50`	`override def get(key: A): Option[B] = {`
`51`	`51`	`val timeStampedValue = internalMap.get(key)`
`52`	`52`	`if (updateTimeStampOnGet && timeStampedValue != null) {`
`53`		`- internalJavaMap.replace(key, timeStampedValue, TimeStampedValue(currentTime, timeStampedValue.value))`
	`53`	`+ internalJavaMap.replace(key, timeStampedValue,`
	`54`	`+ TimeStampedValue(currentTime, timeStampedValue.value))`
`54`	`55`	`}`
`55`	`56`	`Option(timeStampedValue).map(_.value)`
`56`	`57`	`}`