-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-1769] Executor loss causes NPE race condition #762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
07d720c
0921ea0
05ad9e9
2189247
769be19
3f32981
383e739
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,10 @@ | |
|
|
||
| package org.apache.spark.scheduler | ||
|
|
||
| import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue} | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
| import scala.collection.mutable.ArrayBuffer | ||
| import scala.collection.mutable.HashMap | ||
|
|
||
| import org.apache.spark.Logging | ||
| import org.apache.spark.scheduler.SchedulingMode.SchedulingMode | ||
|
|
@@ -35,18 +37,15 @@ private[spark] class Pool( | |
| extends Schedulable | ||
| with Logging { | ||
|
|
||
| var schedulableQueue = new ArrayBuffer[Schedulable] | ||
| var schedulableNameToSchedulable = new HashMap[String, Schedulable] | ||
|
|
||
| val schedulableQueue = new LinkedBlockingQueue[Schedulable] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: ConurrentLinkedQueue is a more suitable data structure for this type of access -- it provides a very efficient linked list, and it uses "add()" instead of "offer()". Also no blocking stuff. |
||
| val schedulableNameToSchedulable = new ConcurrentHashMap[String, Schedulable] | ||
| var weight = initWeight | ||
| var minShare = initMinShare | ||
| var runningTasks = 0 | ||
|
|
||
| var priority = 0 | ||
|
|
||
| // A pool's stage id is used to break the tie in scheduling. | ||
| var stageId = -1 | ||
|
|
||
| var name = poolName | ||
| var parent: Pool = null | ||
|
|
||
|
|
@@ -60,21 +59,21 @@ private[spark] class Pool( | |
| } | ||
|
|
||
| override def addSchedulable(schedulable: Schedulable) { | ||
| schedulableQueue += schedulable | ||
| schedulableNameToSchedulable(schedulable.name) = schedulable | ||
| schedulableQueue.offer(schedulable) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Could we add a require(schedule != null) here? It would cause a NPE anyway, I think this would just make it clearer. It also makes our use of |
||
| schedulableNameToSchedulable.put(schedulable.name, schedulable) | ||
| schedulable.parent = this | ||
| } | ||
|
|
||
| override def removeSchedulable(schedulable: Schedulable) { | ||
| schedulableQueue -= schedulable | ||
| schedulableNameToSchedulable -= schedulable.name | ||
| schedulableQueue.remove(schedulable) | ||
| schedulableNameToSchedulable.remove(schedulable.name) | ||
| } | ||
|
|
||
| override def getSchedulableByName(schedulableName: String): Schedulable = { | ||
| if (schedulableNameToSchedulable.contains(schedulableName)) { | ||
| return schedulableNameToSchedulable(schedulableName) | ||
| if (schedulableNameToSchedulable.containsKey(schedulableName)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, man, good catch |
||
| return schedulableNameToSchedulable.get(schedulableName) | ||
| } | ||
| for (schedulable <- schedulableQueue) { | ||
| for (schedulable <- schedulableQueue.asScala) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think ".iterator" might work here and below, which would be a bit nicer since the documentation specifically provides guarantees about it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, actually I think you actually just imported JavaConverters instead of JavaConversions. Probably don't need this stuff if you do that.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that would be good |
||
| val sched = schedulable.getSchedulableByName(schedulableName) | ||
| if (sched != null) { | ||
| return sched | ||
|
|
@@ -84,22 +83,23 @@ private[spark] class Pool( | |
| } | ||
|
|
||
| override def executorLost(executorId: String, host: String) { | ||
| schedulableQueue.foreach(_.executorLost(executorId, host)) | ||
| schedulableQueue.asScala.foreach(_.executorLost(executorId, host)) | ||
| } | ||
|
|
||
| override def checkSpeculatableTasks(): Boolean = { | ||
| var shouldRevive = false | ||
| for (schedulable <- schedulableQueue) { | ||
| for (schedulable <- schedulableQueue.asScala) { | ||
| shouldRevive |= schedulable.checkSpeculatableTasks() | ||
| } | ||
| shouldRevive | ||
| } | ||
|
|
||
| override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = { | ||
| override def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] = { | ||
| var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager] | ||
| val sortedSchedulableQueue = schedulableQueue.sortWith(taskSetSchedulingAlgorithm.comparator) | ||
| val sortedSchedulableQueue = | ||
| schedulableQueue.asScala.toArray.sortWith(taskSetSchedulingAlgorithm.comparator) | ||
| for (schedulable <- sortedSchedulableQueue) { | ||
| sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue() | ||
| sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue | ||
| } | ||
| sortedTaskSetQueue | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is an API comment now, let's move the TODO to a // comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch; that would have been an embarrassing doc