From f5286978b8f70db318c30e730c7f8a7a63668361 Mon Sep 17 00:00:00 2001 From: Mohamadreza Rostami Date: Wed, 15 Dec 2021 17:16:08 +0330 Subject: [PATCH] [SPARK-37060][CORE] Handle driver status response from backup masters --- .../main/scala/org/apache/spark/deploy/Client.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala index 7c5ab43a9e1b3..4e1810d9817df 100644 --- a/core/src/main/scala/org/apache/spark/deploy/Client.scala +++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala @@ -190,13 +190,15 @@ private class ClientEndpoint( logDebug(s"State of driver $submittedDriverID is ${state.get}, " + s"continue monitoring driver status.") } - } - } - } else { - logError(s"ERROR: Cluster master did not recognize $submittedDriverID") - System.exit(-1) + } } + } else if (exception.exists(e => Utils.responseFromBackup(e.getMessage))) { + logDebug(s"The status response is reported from a backup spark instance. So, ignored.") + } else { + logError(s"ERROR: Cluster master did not recognize $submittedDriverID") + System.exit(-1) } + } override def receive: PartialFunction[Any, Unit] = { case SubmitDriverResponse(master, success, driverId, message) =>