GulajavaMinistudio · GulajavaMinistudio · Dec 7, 2017 · Dec 6, 2017 · Dec 6, 2017 · Dec 6, 2017
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -879,6 +879,7 @@ public LongArray getArray() {
    * Reset this map to initialized state.
    */
   public void reset() {
+    updatePeakMemoryUsed();
     numKeys = 0;
     numValues = 0;
     freeArray(longArray);

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -99,7 +99,8 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   private def calcChecksum(block: ByteBuffer): Int = {
     val adler = new Adler32()
     if (block.hasArray) {
-      adler.update(block.array, block.arrayOffset + block.position, block.limit - block.position)
+      adler.update(block.array, block.arrayOffset + block.position(), block.limit()
+        - block.position())
     } else {
       val bytes = new Array[Byte](block.remaining())
       block.duplicate.get(bytes)

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -165,11 +165,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     }
 
     if (notifyDriver && driver.nonEmpty) {
-      driver.get.ask[Boolean](
-        RemoveExecutor(executorId, new ExecutorLossReason(reason))
-      ).failed.foreach(e =>
-        logWarning(s"Unable to notify the driver due to " + e.getMessage, e)
-      )(ThreadUtils.sameThread)
+      driver.get.send(RemoveExecutor(executorId, new ExecutorLossReason(reason)))
     }
 
     System.exit(code)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -452,7 +452,7 @@ private[spark] class Executor(
         // TODO: do not serialize value twice
         val directResult = new DirectTaskResult(valueBytes, accumUpdates)
         val serializedDirectResult = ser.serialize(directResult)
-        val resultSize = serializedDirectResult.limit
+        val resultSize = serializedDirectResult.limit()
 
         // directSend = sending directly back to the driver
         val serializedResult: ByteBuffer = {

diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -235,7 +235,9 @@ private[spark] case class ConfigBuilder(key: String) {
   }
 
   def fallbackConf[T](fallback: ConfigEntry[T]): ConfigEntry[T] = {
-    new FallbackConfigEntry(key, _alternatives, _doc, _public, fallback)
+    val entry = new FallbackConfigEntry(key, _alternatives, _doc, _public, fallback)
+    _onCreate.foreach(_(entry))
+    entry
   }
 
   def regexConf: TypedConfigBuilder[Regex] = {

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -95,6 +95,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // The num of current max ExecutorId used to re-register appMaster
   @volatile protected var currentExecutorIdCounter = 0
 
+  private val reviveThread =
+    ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
+
   class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
     extends ThreadSafeRpcEndpoint with Logging {
 
@@ -103,9 +106,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     protected val addressToExecutorId = new HashMap[RpcAddress, String]
 
-    private val reviveThread =
-      ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
-
     override def onStart() {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.getTimeAsMs("spark.scheduler.revive.interval", "1s")
@@ -154,6 +154,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.values.foreach { ed =>
           ed.executorEndpoint.send(UpdateDelegationTokens(newDelegationTokens))
         }
+
+      case RemoveExecutor(executorId, reason) =>
+        // We will remove the executor's state and cannot restore it. However, the connection
+        // between the driver and the executor may be still alive so that the executor won't exit
+        // automatically, so try to tell the executor to stop itself. See SPARK-13519.
+        executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
+        removeExecutor(executorId, reason)
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -215,14 +222,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         }
         context.reply(true)
 
-      case RemoveExecutor(executorId, reason) =>
-        // We will remove the executor's state and cannot restore it. However, the connection
-        // between the driver and the executor may be still alive so that the executor won't exit
-        // automatically, so try to tell the executor to stop itself. See SPARK-13519.
-        executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
-        removeExecutor(executorId, reason)
-        context.reply(true)
-
       case RemoveWorker(workerId, host, message) =>
         removeWorker(workerId, host, message)
         context.reply(true)
@@ -288,13 +287,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
       for (task <- tasks.flatten) {
         val serializedTask = TaskDescription.encode(task)
-        if (serializedTask.limit >= maxRpcMessageSize) {
+        if (serializedTask.limit() >= maxRpcMessageSize) {
           scheduler.taskIdToTaskSetManager.get(task.taskId).foreach { taskSetMgr =>
             try {
               var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " +
                 "spark.rpc.message.maxSize (%d bytes). Consider increasing " +
                 "spark.rpc.message.maxSize or using broadcast variables for large values."
-              msg = msg.format(task.taskId, task.index, serializedTask.limit, maxRpcMessageSize)
+              msg = msg.format(task.taskId, task.index, serializedTask.limit(), maxRpcMessageSize)
               taskSetMgr.abort(msg)
             } catch {
               case e: Exception => logError("Exception in error callback", e)
@@ -373,10 +372,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
       shouldDisable
     }
-
-    override def onStop() {
-      reviveThread.shutdownNow()
-    }
   }
 
   var driverEndpoint: RpcEndpointRef = null
@@ -417,6 +412,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   }
 
   override def stop() {
+    reviveThread.shutdownNow()
     stopExecutors()
     try {
       if (driverEndpoint != null) {
@@ -465,9 +461,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * at once.
    */
   protected def removeExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
-    // Only log the failure since we don't care about the result.
-    driverEndpoint.ask[Boolean](RemoveExecutor(executorId, reason)).failed.foreach(t =>
-      logError(t.getMessage, t))(ThreadUtils.sameThread)
+    driverEndpoint.send(RemoveExecutor(executorId, reason))
   }
 
   protected def removeWorker(workerId: String, host: String, message: String): Unit = {

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.storage
 
+import java.io.IOException
 import java.util.{HashMap => JHashMap}
 
 import scala.collection.JavaConverters._
@@ -159,11 +160,16 @@ class BlockManagerMasterEndpoint(
     // Ask the slaves to remove the RDD, and put the result in a sequence of Futures.
     // The dispatcher is used as an implicit argument into the Future sequence construction.
     val removeMsg = RemoveRdd(rddId)
-    Future.sequence(
-      blockManagerInfo.values.map { bm =>
-        bm.slaveEndpoint.ask[Int](removeMsg)
-      }.toSeq
-    )
+
+    val futures = blockManagerInfo.values.map { bm =>
+      bm.slaveEndpoint.ask[Int](removeMsg).recover {
+        case e: IOException =>
+          logWarning(s"Error trying to remove RDD $rddId", e)
+          0 // zero blocks were removed
+      }
+    }.toSeq
+
+    Future.sequence(futures)
   }
 
   private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = {

diff --git a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -54,7 +54,7 @@ class ByteBufferInputStream(private var buffer: ByteBuffer)
   override def skip(bytes: Long): Long = {
     if (buffer != null) {
       val amountToSkip = math.min(bytes, buffer.remaining).toInt
-      buffer.position(buffer.position + amountToSkip)
+      buffer.position(buffer.position() + amountToSkip)
       if (buffer.remaining() == 0) {
         cleanUp()
       }

diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -65,7 +65,7 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
     for (bytes <- getChunks()) {
       while (bytes.remaining() > 0) {
         val ioSize = Math.min(bytes.remaining(), bufferWriteChunkSize)
-        bytes.limit(bytes.position + ioSize)
+        bytes.limit(bytes.position() + ioSize)
         channel.write(bytes)
       }
     }
@@ -206,7 +206,7 @@ private[spark] class ChunkedByteBufferInputStream(
   override def skip(bytes: Long): Long = {
     if (currentChunk != null) {
       val amountToSkip = math.min(bytes, currentChunk.remaining).toInt
-      currentChunk.position(currentChunk.position + amountToSkip)
+      currentChunk.position(currentChunk.position() + amountToSkip)
       if (currentChunk.remaining() == 0) {
         if (chunks.hasNext) {
           currentChunk = chunks.next()

diff --git a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
@@ -288,4 +288,24 @@ class ConfigEntrySuite extends SparkFunSuite {
     conf.remove(testKey("b"))
     assert(conf.get(iConf) === 3)
   }
+
+  test("onCreate") {
+    var onCreateCalled = false
+    ConfigBuilder(testKey("oc1")).onCreate(_ => onCreateCalled = true).intConf.createWithDefault(1)
+    assert(onCreateCalled)
+
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc2")).onCreate(_ => onCreateCalled = true).intConf.createOptional
+    assert(onCreateCalled)
+
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc3")).onCreate(_ => onCreateCalled = true).intConf
+      .createWithDefaultString("1.0")
+    assert(onCreateCalled)
+
+    val fallback = ConfigBuilder(testKey("oc4")).intConf.createWithDefault(1)
+    onCreateCalled = false
+    ConfigBuilder(testKey("oc5")).onCreate(_ => onCreateCalled = true).fallbackConf(fallback)
+    assert(onCreateCalled)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -199,7 +199,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     def check[T: ClassTag](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
       // Check that very long ranges don't get written one element at a time
-      assert(ser.serialize(t).limit < 100)
+      assert(ser.serialize(t).limit() < 100)
     }
     check(1 to 1000000)
     check(1 to 1000000 by 2)

diff --git a/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskStoreSuite.scala
@@ -118,7 +118,7 @@ class DiskStoreSuite extends SparkFunSuite {
     val chunks = chunkedByteBuffer.chunks
     assert(chunks.size === 2)
     for (chunk <- chunks) {
-      assert(chunk.limit === 10 * 1024)
+      assert(chunk.limit() === 10 * 1024)
     }
 
     val e = intercept[IllegalArgumentException]{

diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
@@ -35,7 +35,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-3.0.7.jar
+commons-compiler-3.0.8.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
@@ -96,7 +96,7 @@ jackson-mapper-asl-1.9.13.jar
 jackson-module-paranamer-2.7.9.jar
 jackson-module-scala_2.11-2.6.7.1.jar
 jackson-xc-1.9.13.jar
-janino-3.0.7.jar
+janino-3.0.8.jar
 java-xmlbuilder-1.1.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
@@ -180,7 +180,7 @@ stax-api-1.0.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
-univocity-parsers-2.5.4.jar
+univocity-parsers-2.5.9.jar
 validation-api-1.1.0.Final.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar

diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
@@ -35,7 +35,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-3.0.7.jar
+commons-compiler-3.0.8.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
@@ -96,7 +96,7 @@ jackson-mapper-asl-1.9.13.jar
 jackson-module-paranamer-2.7.9.jar
 jackson-module-scala_2.11-2.6.7.1.jar
 jackson-xc-1.9.13.jar
-janino-3.0.7.jar
+janino-3.0.8.jar
 java-xmlbuilder-1.1.jar
 javassist-3.18.1-GA.jar
 javax.annotation-api-1.2.jar
@@ -181,7 +181,7 @@ stax-api-1.0.1.jar
 stream-2.7.0.jar
 stringtemplate-3.2.1.jar
 super-csv-2.2.0.jar
-univocity-parsers-2.5.4.jar
+univocity-parsers-2.5.9.jar
 validation-api-1.1.0.Final.jar
 xbean-asm5-shaded-4.4.jar
 xercesImpl-2.9.1.jar

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -296,7 +296,9 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     props.put("replica.socket.timeout.ms", "1500")
     props.put("delete.topic.enable", "true")
     props.put("offsets.topic.num.partitions", "1")
-    props.putAll(withBrokerProps.asJava)
+    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
+    // See https://github.com/scala/bug/issues/10418
+    withBrokerProps.foreach { case (k, v) => props.put(k, v) }
     props
   }
 

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.HasInputCols
+import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types._
 /**
  * Params for [[Imputer]] and [[ImputerModel]].
  */
-private[feature] trait ImputerParams extends Params with HasInputCols {
+private[feature] trait ImputerParams extends Params with HasInputCols with HasOutputCols {
 
   /**
    * The imputation strategy. Currently only "mean" and "median" are supported.
@@ -63,16 +63,6 @@ private[feature] trait ImputerParams extends Params with HasInputCols {
   /** @group getParam */
   def getMissingValue: Double = $(missingValue)
 
-  /**
-   * Param for output column names.
-   * @group param
-   */
-  final val outputCols: StringArrayParam = new StringArrayParam(this, "outputCols",
-    "output column names")
-
-  /** @group getParam */
-  final def getOutputCols: Array[String] = $(outputCols)
-
   /** Validates and transforms the input schema. */
   protected def validateAndTransformSchema(schema: StructType): StructType = {
     require($(inputCols).length == $(inputCols).distinct.length, s"inputCols contains" +

diff --git a/pom.xml b/pom.xml
@@ -170,7 +170,7 @@
     <!-- org.apache.commons/commons-lang3/-->
     <commons-lang3.version>3.5</commons-lang3.version>
     <datanucleus-core.version>3.2.10</datanucleus-core.version>
-    <janino.version>3.0.7</janino.version>
+    <janino.version>3.0.8</janino.version>
     <jersey.version>2.22.2</jersey.version>
     <joda.version>2.9.3</joda.version>
     <jodd.version>3.5.2</jodd.version>

diff --git a/...scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/...scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -282,7 +282,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     // No more deletion attempts of the executors.
     // This is graceful termination and should not be detected as a failure.
     verify(podOperations, times(1)).delete(resolvedPod)
-    verify(driverEndpointRef, times(1)).ask[Boolean](
+    verify(driverEndpointRef, times(1)).send(
       RemoveExecutor("1", ExecutorExited(
         0,
         exitCausedByApp = false,
@@ -318,7 +318,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     requestExecutorRunnable.getValue.run()
     allocatorRunnable.getAllValues.asScala.last.run()
     verify(podOperations, never()).delete(firstResolvedPod)
-    verify(driverEndpointRef).ask[Boolean](
+    verify(driverEndpointRef).send(
       RemoveExecutor("1", ExecutorExited(
         1,
         exitCausedByApp = true,
@@ -356,7 +356,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     val recreatedResolvedPod = expectPodCreationWithId(2, SECOND_EXECUTOR_POD)
     allocatorRunnable.getValue.run()
     verify(podOperations).delete(firstResolvedPod)
-    verify(driverEndpointRef).ask[Boolean](
+    verify(driverEndpointRef).send(
       RemoveExecutor("1", SlaveLost("Executor lost for unknown reasons.")))
   }