Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,10 @@ private[spark] trait ExecutorAllocationClient {
* @return whether the request is acknowledged by the cluster manager.
*/
def killExecutor(executorId: String): Boolean = killExecutors(Seq(executorId))

/**
* Request that the cluster manager kill the specified executor and expect new one
* @return whether the request is acknowledged by the cluster manager.
*/
def expireExecutor(executorId: String): Boolean
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a confusing name. Without reading the java docs I have no idea what the difference between killExecutor and expireExecutor is

}
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext)
// Asynchronously kill the executor to avoid blocking the current thread
killExecutorThread.submit(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
sc.killExecutor(executorId)
sc.expireExecutor(executorId)
}
})
}
Expand Down
18 changes: 18 additions & 0 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,24 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
@DeveloperApi
override def killExecutor(executorId: String): Boolean = super.killExecutor(executorId)

/**
* :: DeveloperApi ::
* Request that the cluster manager kill the specified executor and expect new one
* @return whether the request is acknowledged by the cluster manager.
*/
@DeveloperApi
override def expireExecutor(id: String): Boolean = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't expect users to call this so we shouldn't expose this as a public API.

assert(supportDynamicAllocation,
"Expire executor is currently only supported in YARN mode")
schedulerBackend match {
case b: CoarseGrainedSchedulerBackend =>
b.expireExecutor(id)
case _ =>
logWarning("Expire executor is only supported in coarse-grained mode")
false
}
}

/** The version of Spark on which this application is running. */
def version: String = SPARK_VERSION

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp

// Executors we have requested the cluster manager to kill that have not died yet
private val executorsPendingToRemove = new HashSet[String]
private val actuallyLostExecutorsId = new HashSet[String]

class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
extends ThreadSafeRpcEndpoint with Logging {
Expand Down Expand Up @@ -220,6 +221,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
// This must be synchronized because variables mutated
// in this block are read when requesting executors
CoarseGrainedSchedulerBackend.this.synchronized {
actuallyLostExecutorsId += executorId
addressToExecutorId -= executorInfo.executorAddress
executorDataMap -= executorId
executorsPendingToRemove -= executorId
Expand Down Expand Up @@ -377,7 +379,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
val filteredExecutorIds = new ArrayBuffer[String]
executorIds.foreach { id =>
if (executorDataMap.contains(id)) {
if (executorDataMap.contains(id) && !actuallyLostExecutorsId.contains(id)
&& !executorsPendingToRemove.contains(id)) {
filteredExecutorIds += id
} else {
logWarning(s"Executor to kill $id does not exist!")
Expand All @@ -393,6 +396,23 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
doKillExecutors(filteredExecutorIds)
}

/**
* Request that the cluster manager kill the specified executor and expect new one
* @return whether the request is acknowledged by the cluster manager.
*/
def expireExecutor(id: String): Boolean = synchronized {
logInfo(s"Requesting to expired executor ${id}")
val filteredExecutorIds = new ArrayBuffer[String]
if (executorDataMap.contains(id) && !actuallyLostExecutorsId.contains(id)
&& !executorsPendingToRemove.contains(id)) {
filteredExecutorIds += id
} else {
logWarning(s"Executor to kill $id does not exist!")
}

doKillExecutors(filteredExecutorIds)
}

/**
* Kill the given list of executors through the cluster manager.
* Return whether the kill request is acknowledged.
Expand Down