GulajavaMinistudio · GulajavaMinistudio · Apr 14, 2017 · Apr 12, 2017 · Apr 12, 2017 · Apr 12, 2017
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -2818,14 +2818,14 @@ setMethod("write.df",
           signature(df = "SparkDataFrame"),
           function(df, path = NULL, source = NULL, mode = "error", ...) {
             if (!is.null(path) && !is.character(path)) {
-              stop("path should be charactor, NULL or omitted.")
+              stop("path should be character, NULL or omitted.")
             }
             if (!is.null(source) && !is.character(source)) {
               stop("source should be character, NULL or omitted. It is the datasource specified ",
                    "in 'spark.sql.sources.default' configuration by default.")
             }
             if (!is.character(mode)) {
-              stop("mode should be charactor or omitted. It is 'error' by default.")
+              stop("mode should be character or omitted. It is 'error' by default.")
             }
             if (is.null(source)) {
               source <- getDefaultSqlSource()
@@ -3040,7 +3040,7 @@ setMethod("fillna",
           signature(x = "SparkDataFrame"),
           function(x, value, cols = NULL) {
             if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
-              stop("value should be an integer, numeric, charactor or named list.")
+              stop("value should be an integer, numeric, character or named list.")
             }
 
             if (class(value) == "list") {
@@ -3052,7 +3052,7 @@ setMethod("fillna",
               # Check each item in the named list is of valid type
               lapply(value, function(v) {
                 if (!(class(v) %in% c("integer", "numeric", "character"))) {
-                  stop("Each item in value should be an integer, numeric or charactor.")
+                  stop("Each item in value should be an integer, numeric or character.")
                 }
               })
 
@@ -3598,7 +3598,7 @@ setMethod("write.stream",
                    "in 'spark.sql.sources.default' configuration by default.")
             }
             if (!is.null(outputMode) && !is.character(outputMode)) {
-              stop("outputMode should be charactor or omitted.")
+              stop("outputMode should be character or omitted.")
             }
             if (is.null(source)) {
               source <- getDefaultSqlSource()

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -606,7 +606,7 @@ tableToDF <- function(tableName) {
 #' @note read.df since 1.4.0
 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
   if (!is.null(path) && !is.character(path)) {
-    stop("path should be charactor, NULL or omitted.")
+    stop("path should be character, NULL or omitted.")
   }
   if (!is.null(source) && !is.character(source)) {
     stop("source should be character, NULL or omitted. It is the datasource specified ",

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2926,9 +2926,9 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
                paste("source should be character, NULL or omitted. It is the datasource specified",
                      "in 'spark.sql.sources.default' configuration by default."))
   expect_error(write.df(df, path = c(3)),
-               "path should be charactor, NULL or omitted.")
+               "path should be character, NULL or omitted.")
   expect_error(write.df(df, mode = TRUE),
-               "mode should be charactor or omitted. It is 'error' by default.")
+               "mode should be character or omitted. It is 'error' by default.")
 })
 
 test_that("Call DataFrameWriter.load() API in Java without path and check argument types", {
@@ -2947,7 +2947,7 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
-               "path should be charactor, NULL or omitted.")
+               "path should be character, NULL or omitted.")
   expect_error(read.df(jsonPath, source = c(1, 2)),
                paste("source should be character, NULL or omitted. It is the datasource specified",
                      "in 'spark.sql.sources.default' configuration by default."))

diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
@@ -35,7 +35,7 @@ private[spark] trait RpcEnvFactory {
  *
  * The life-cycle of an endpoint is:
  *
- * constructor -> onStart -> receive* -> onStop
+ * {@code constructor -> onStart -> receive* -> onStop}
  *
  * Note: `receive` can be called concurrently. If you want `receive` to be thread-safe, please use
  * [[ThreadSafeRpcEndpoint]]
@@ -63,16 +63,16 @@ private[spark] trait RpcEndpoint {
   }
 
   /**
-   * Process messages from [[RpcEndpointRef.send]] or [[RpcCallContext.reply)]]. If receiving a
-   * unmatched message, [[SparkException]] will be thrown and sent to `onError`.
+   * Process messages from `RpcEndpointRef.send` or `RpcCallContext.reply`. If receiving a
+   * unmatched message, `SparkException` will be thrown and sent to `onError`.
    */
   def receive: PartialFunction[Any, Unit] = {
     case _ => throw new SparkException(self + " does not implement 'receive'")
   }
 
   /**
-   * Process messages from [[RpcEndpointRef.ask]]. If receiving a unmatched message,
-   * [[SparkException]] will be thrown and sent to `onError`.
+   * Process messages from `RpcEndpointRef.ask`. If receiving a unmatched message,
+   * `SparkException` will be thrown and sent to `onError`.
    */
   def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
     case _ => context.sendFailure(new SparkException(self + " won't reply anything"))

diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
@@ -26,7 +26,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
- * An exception thrown if RpcTimeout modifies a [[TimeoutException]].
+ * An exception thrown if RpcTimeout modifies a `TimeoutException`.
  */
 private[rpc] class RpcTimeoutException(message: String, cause: TimeoutException)
   extends TimeoutException(message) { initCause(cause) }

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -607,7 +607,7 @@ class DAGScheduler(
    * @param resultHandler callback to pass each result to
    * @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name
    *
-   * @throws Exception when the job fails
+   * @note Throws `Exception` when the job fails
    */
   def runJob[T, U](
       rdd: RDD[T],
@@ -644,7 +644,7 @@ class DAGScheduler(
    *
    * @param rdd target RDD to run tasks on
    * @param func a function to run on each partition of the RDD
-   * @param evaluator [[ApproximateEvaluator]] to receive the partial results
+   * @param evaluator `ApproximateEvaluator` to receive the partial results
    * @param callSite where in the user program this job was called
    * @param timeout maximum time to wait for the job, in milliseconds
    * @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExternalClusterManager.scala b/core/src/main/scala/org/apache/spark/scheduler/ExternalClusterManager.scala
@@ -42,7 +42,7 @@ private[spark] trait ExternalClusterManager {
 
   /**
    * Create a scheduler backend for the given SparkContext and scheduler. This is
-   * called after task scheduler is created using [[ExternalClusterManager.createTaskScheduler()]].
+   * called after task scheduler is created using `ExternalClusterManager.createTaskScheduler()`.
    * @param sc SparkContext
    * @param masterURL the master URL
    * @param scheduler TaskScheduler that will be used with the scheduler backend.

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -38,7 +38,7 @@ import org.apache.spark.util.{AccumulatorV2, ThreadUtils, Utils}
 
 /**
  * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend.
- * It can also work with a local setup by using a [[LocalSchedulerBackend]] and setting
+ * It can also work with a local setup by using a `LocalSchedulerBackend` and setting
  * isLocal to true. It handles common logic, like determining a scheduling order across jobs, waking
  * up to launch speculative tasks, etc.
  *
@@ -704,12 +704,12 @@ private[spark] object TaskSchedulerImpl {
    * Used to balance containers across hosts.
    *
    * Accepts a map of hosts to resource offers for that host, and returns a prioritized list of
-   * resource offers representing the order in which the offers should be used.  The resource
+   * resource offers representing the order in which the offers should be used. The resource
    * offers are ordered such that we'll allocate one container on each host before allocating a
    * second container on any host, and so on, in order to reduce the damage if a host fails.
    *
-   * For example, given <h1, [o1, o2, o3]>, <h2, [o4]>, <h1, [o5, o6]>, returns
-   * [o1, o5, o4, 02, o6, o3]
+   * For example, given {@literal <h1, [o1, o2, o3]>}, {@literal <h2, [o4]>} and
+   * {@literal <h3, [o5, o6]>}, returns {@literal [o1, o5, o4, o2, o6, o3]}.
    */
   def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = {
     val _keyList = new ArrayBuffer[K](map.size)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler.cluster
 
 import java.util.concurrent.Semaphore
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.concurrent.Future
 
@@ -42,7 +43,7 @@ private[spark] class StandaloneSchedulerBackend(
   with Logging {
 
   private var client: StandaloneAppClient = null
-  private var stopping = false
+  private val stopping = new AtomicBoolean(false)
   private val launcherBackend = new LauncherBackend() {
     override protected def onStopRequest(): Unit = stop(SparkAppHandle.State.KILLED)
   }
@@ -112,7 +113,7 @@ private[spark] class StandaloneSchedulerBackend(
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
-  override def stop(): Unit = synchronized {
+  override def stop(): Unit = {
     stop(SparkAppHandle.State.FINISHED)
   }
 
@@ -125,14 +126,14 @@ private[spark] class StandaloneSchedulerBackend(
 
   override def disconnected() {
     notifyContext()
-    if (!stopping) {
+    if (!stopping.get) {
       logWarning("Disconnected from Spark cluster! Waiting for reconnection...")
     }
   }
 
   override def dead(reason: String) {
     notifyContext()
-    if (!stopping) {
+    if (!stopping.get) {
       launcherBackend.setState(SparkAppHandle.State.KILLED)
       logError("Application has been killed. Reason: " + reason)
       try {
@@ -206,20 +207,20 @@ private[spark] class StandaloneSchedulerBackend(
     registrationBarrier.release()
   }
 
-  private def stop(finalState: SparkAppHandle.State): Unit = synchronized {
-    try {
-      stopping = true
-
-      super.stop()
-      client.stop()
+  private def stop(finalState: SparkAppHandle.State): Unit = {
+    if (stopping.compareAndSet(false, true)) {
+      try {
+        super.stop()
+        client.stop()
 
-      val callback = shutdownCallback
-      if (callback != null) {
-        callback(this)
+        val callback = shutdownCallback
+        if (callback != null) {
+          callback(this)
+        }
+      } finally {
+        launcherBackend.setState(finalState)
+        launcherBackend.close()
       }
-    } finally {
-      launcherBackend.setState(finalState)
-      launcherBackend.close()
     }
   }
 

diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -125,15 +125,15 @@ abstract class SerializerInstance {
  * A stream for writing serialized objects.
  */
 @DeveloperApi
-abstract class SerializationStream {
+abstract class SerializationStream extends Closeable {
   /** The most general-purpose method to write an object. */
   def writeObject[T: ClassTag](t: T): SerializationStream
   /** Writes the object representing the key of a key-value pair. */
   def writeKey[T: ClassTag](key: T): SerializationStream = writeObject(key)
   /** Writes the object representing the value of a key-value pair. */
   def writeValue[T: ClassTag](value: T): SerializationStream = writeObject(value)
   def flush(): Unit
-  def close(): Unit
+  override def close(): Unit
 
   def writeAll[T: ClassTag](iter: Iterator[T]): SerializationStream = {
     while (iter.hasNext) {
@@ -149,14 +149,14 @@ abstract class SerializationStream {
  * A stream for reading serialized objects.
  */
 @DeveloperApi
-abstract class DeserializationStream {
+abstract class DeserializationStream extends Closeable {
   /** The most general-purpose method to read an object. */
   def readObject[T: ClassTag](): T
   /** Reads the object representing the key of a key-value pair. */
   def readKey[T: ClassTag](): T = readObject[T]()
   /** Reads the object representing the value of a key-value pair. */
   def readValue[T: ClassTag](): T = readObject[T]()
-  def close(): Unit
+  override def close(): Unit
 
   /**
    * Read the elements of this stream through an iterator. This can only be called once, as

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -66,7 +66,7 @@ private[spark] trait BlockData {
   /**
    * Returns a Netty-friendly wrapper for the block's data.
    *
-   * @see [[ManagedBuffer#convertToNetty()]]
+   * Please see `ManagedBuffer.convertToNetty()` for more details.
    */
   def toNetty(): Object
 

diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -243,7 +243,7 @@ private[spark] object AccumulatorSuite {
   import InternalAccumulator._
 
   /**
-   * Create a long accumulator and register it to [[AccumulatorContext]].
+   * Create a long accumulator and register it to `AccumulatorContext`.
    */
   def createLongAccum(
       name: String,
@@ -258,7 +258,7 @@ private[spark] object AccumulatorSuite {
   }
 
   /**
-   * Make an [[AccumulableInfo]] out of an [[Accumulable]] with the intent to use the
+   * Make an `AccumulableInfo` out of an [[Accumulable]] with the intent to use the
    * info as an accumulator update.
    */
   def makeInfo(a: AccumulatorV2[_, _]): AccumulableInfo = a.toInfo(Some(a.value), None)

diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.network.shuffle.{ExternalShuffleBlockHandler, ExternalSh
 /**
  * This suite creates an external shuffle server and routes all shuffle fetches through it.
  * Note that failures in this suite may arise due to changes in Spark that invalidate expectations
- * set up in [[ExternalShuffleBlockHandler]], such as changing the format of shuffle files or how
+ * set up in `ExternalShuffleBlockHandler`, such as changing the format of shuffle files or how
  * we hash files into folders.
  */
 class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll {

diff --git a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -22,7 +22,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.Suite
 
-/** Manages a local `sc` {@link SparkContext} variable, correctly stopping it after each test. */
+/** Manages a local `sc` `SparkContext` variable, correctly stopping it after each test. */
 trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self: Suite =>
 
   @transient var sc: SparkContext = _

diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -95,12 +95,12 @@ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends Spa
   }
 
   /**
-   * A map from partition -> results for all tasks of a job when you call this test framework's
+   * A map from partition to results for all tasks of a job when you call this test framework's
    * [[submit]] method.  Two important considerations:
    *
    * 1. If there is a job failure, results may or may not be empty.  If any tasks succeed before
    * the job has failed, they will get included in `results`.  Instead, check for job failure by
-   * checking [[failure]].  (Also see [[assertDataStructuresEmpty()]])
+   * checking [[failure]]. (Also see `assertDataStructuresEmpty()`)
    *
    * 2. This only gets cleared between tests.  So you'll need to do special handling if you submit
    * more than one job in one test.

diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset
 /**
  * Tests to ensure that [[Serializer]] implementations obey the API contracts for methods that
  * describe properties of the serialized stream, such as
- * [[Serializer.supportsRelocationOfSerializedObjects]].
+ * `Serializer.supportsRelocationOfSerializedObjects`.
  */
 class SerializerPropertiesSuite extends SparkFunSuite {
 

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
@@ -70,9 +70,18 @@ class RandomBlockReplicationPolicyBehavior extends SparkFunSuite
     }
   }
 
+  /**
+   * Returns a sequence of [[BlockManagerId]], whose rack is randomly picked from the given `racks`.
+   * Note that, each rack will be picked at least once from `racks`, if `count` is greater or equal
+   * to the number of `racks`.
+   */
   protected def generateBlockManagerIds(count: Int, racks: Seq[String]): Seq[BlockManagerId] = {
-    (1 to count).map{i =>
-      BlockManagerId(s"Exec-$i", s"Host-$i", 10000 + i, Some(racks(Random.nextInt(racks.size))))
+    val randomizedRacks: Seq[String] = Random.shuffle(
+      racks ++ racks.length.until(count).map(_ => racks(Random.nextInt(racks.length)))
+    )
+
+    (0 until count).map { i =>
+      BlockManagerId(s"Exec-$i", s"Host-$i", 10000 + i, Some(randomizedRacks(i)))
     }
   }
 }

diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -344,6 +344,19 @@ def build_spark_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
+def build_spark_unidoc_sbt(hadoop_version):
+    set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION")
+    # Enable all of the profiles for the build:
+    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    sbt_goals = ["unidoc"]
+    profiles_and_goals = build_profiles + sbt_goals
+
+    print("[info] Building Spark unidoc (w/Hive 1.2.1) using SBT with these arguments: ",
+          " ".join(profiles_and_goals))
+
+    exec_sbt(profiles_and_goals)
+
+
 def build_spark_assembly_sbt(hadoop_version):
     # Enable all of the profiles for the build:
     build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
@@ -352,6 +365,8 @@ def build_spark_assembly_sbt(hadoop_version):
     print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
           " ".join(profiles_and_goals))
     exec_sbt(profiles_and_goals)
+    # Make sure that Java and Scala API documentation can be generated
+    build_spark_unidoc_sbt(hadoop_version)
 
 
 def build_apache_spark(build_tool, hadoop_version):