@@ -37,16 +37,20 @@ private[spark] case class Heartbeat(
3737 taskMetrics : Array [(Long , TaskMetrics )], // taskId -> TaskMetrics
3838 blockManagerId : BlockManagerId )
3939
40+ private [spark] case class RegisterTaskScheduler (scheduler : TaskScheduler )
41+
4042private [spark] case object ExpireDeadHosts
4143
4244private [spark] case class HeartbeatResponse (reregisterBlockManager : Boolean )
4345
4446/**
4547 * Lives in the driver to receive heartbeats from executors..
4648 */
47- private [spark] class HeartbeatReceiver (sc : SparkContext , scheduler : TaskScheduler )
49+ private [spark] class HeartbeatReceiver (sc : SparkContext )
4850 extends Actor with ActorLogReceive with Logging {
4951
52+ private var scheduler : TaskScheduler = null
53+
5054 // executor ID -> timestamp of when the last heartbeat from this executor was received
5155 private val executorLastSeen = new mutable.HashMap [String , Long ]
5256
@@ -71,12 +75,22 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, scheduler: TaskSchedule
7175 }
7276
7377 override def receiveWithLogging : PartialFunction [Any , Unit ] = {
74- case Heartbeat (executorId, taskMetrics, blockManagerId) =>
75- val unknownExecutor = ! scheduler.executorHeartbeatReceived(
76- executorId, taskMetrics, blockManagerId)
77- val response = HeartbeatResponse (reregisterBlockManager = unknownExecutor)
78- executorLastSeen(executorId) = System .currentTimeMillis()
79- sender ! response
78+ case RegisterTaskScheduler (scheduler) =>
79+ this .scheduler = scheduler
80+ case heartbeat @ Heartbeat (executorId, taskMetrics, blockManagerId) =>
81+ if (scheduler == null ) {
82+ val unknownExecutor = ! scheduler.executorHeartbeatReceived(
83+ executorId, taskMetrics, blockManagerId)
84+ val response = HeartbeatResponse (reregisterBlockManager = unknownExecutor)
85+ executorLastSeen(executorId) = System .currentTimeMillis()
86+ sender ! response
87+ } else {
88+ // Because Executor will sleep several seconds then send the first "Heartbeat", this case
89+ // rarely happens. However, if it really happens, log it and ask the executor to register
90+ // itself again.
91+ logWarning(s " Dropping $heartbeat because TaskScheduler has not been ready yet " )
92+ sender ! HeartbeatResponse (reregisterBlockManager = true )
93+ }
8094 case ExpireDeadHosts =>
8195 expireDeadHosts()
8296 }
0 commit comments